strip tags and decode entities in code blocks
parent
536bbea0ea
commit
15c455cd24
11
index.js
11
index.js
|
@ -8,6 +8,7 @@ const table_to_markdown = require('./html_table_to_markdown.js');
|
|||
const validURL = require('@7c/validurl');
|
||||
const express = require('express');
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const htmlentities = require('html-entities');
|
||||
|
||||
const port = process.env.PORT;
|
||||
|
||||
|
@ -71,14 +72,14 @@ app.post('/', function(req, res) {
|
|||
if (!html) {
|
||||
res.status(400).send("Please provide a POST parameter called html");
|
||||
} else {
|
||||
try {
|
||||
//try {
|
||||
let document = new JSDOM(html);
|
||||
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
||||
send_headers(res);
|
||||
res.send(markdown);
|
||||
} catch (error) {
|
||||
res.status(400).send("Could not parse that document");
|
||||
}
|
||||
//} catch (error) {
|
||||
// res.status(400).send("Could not parse that document");
|
||||
//}
|
||||
}
|
||||
|
||||
});
|
||||
|
@ -172,6 +173,8 @@ function code_block_to_markdown (html) {
|
|||
const match_code = /^\s*<code[^>]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
|
||||
if (match_code && match_code[1])
|
||||
inner_html = match_code[1];
|
||||
inner_html = inner_html.replace(/(<([^>]+)>)/ig, "");
|
||||
inner_html = htmlentities.decode(inner_html);
|
||||
const markdown = "```\n"+inner_html+"\n```\n";
|
||||
return markdown;
|
||||
}
|
||||
|
|
|
@ -485,9 +485,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/html-entities": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
|
||||
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.3.tgz",
|
||||
"integrity": "sha512-DV5Ln36z34NNTDgnz0EWGBLZENelNAtkiFA4kyNOG2tDI6Mz1uSWiq1wAKdyjnJwyDiDO7Fa2SO1CTxPXL8VxA=="
|
||||
},
|
||||
"node_modules/http-errors": {
|
||||
"version": "1.8.1",
|
||||
|
|
|
@ -1,205 +0,0 @@
|
|||
2.3.2
|
||||
-----
|
||||
|
||||
* Minimize data files, remove unnecessary files.
|
||||
|
||||
2.3.1
|
||||
-----
|
||||
|
||||
* Improve performance of `encode()`, `decode()` and `decodeEntity()` by using function inlining.
|
||||
* Fix decoding HEX HTML entities in some cases.
|
||||
|
||||
2.3.0
|
||||
-----
|
||||
|
||||
* Add flow types.
|
||||
|
||||
2.2.0
|
||||
-----
|
||||
|
||||
* A fast `decodeEntity()` method to decode a single HTML entity.
|
||||
|
||||
2.1.1
|
||||
-----
|
||||
|
||||
* Speed up both `encode()` and `decode()` methods.
|
||||
|
||||
2.1.0
|
||||
-----
|
||||
|
||||
* Add `extensive` mode to `encode()` method. This mode encodes all non-printable characters, non-ASCII characters and all characters with named references.
|
||||
|
||||
2.0.6
|
||||
-----
|
||||
|
||||
* Handle invalid numeric HTML entities: mimic browser behaviour.
|
||||
|
||||
2.0.5
|
||||
-----
|
||||
|
||||
* Handling behaviour of ambiguous ampersands.
|
||||
|
||||
2.0.4
|
||||
-----
|
||||
|
||||
* Fix webpack build warning.
|
||||
|
||||
2.0.3
|
||||
-----
|
||||
|
||||
* Handle invalid numeric HTML entities.
|
||||
|
||||
2.0.2
|
||||
-----
|
||||
|
||||
* Handle `null` and `undefined` text values.
|
||||
|
||||
2.0.1
|
||||
-----
|
||||
|
||||
* Fix decoding numeric HTML entities.
|
||||
|
||||
2.0.0
|
||||
-----
|
||||
|
||||
* Performance was greatly improved.
|
||||
* New API: simpler and more flexible.
|
||||
|
||||
`htmlEntitiesInstance.encode(text)` -> `encode(text)`
|
||||
|
||||
Before:
|
||||
|
||||
```js
|
||||
import {AllHtmlEntities} from 'html-entities';
|
||||
|
||||
const entities = new AllHtmlEntities();
|
||||
console.log(
|
||||
entities.encode('<Hello & World>')
|
||||
);
|
||||
```
|
||||
|
||||
After:
|
||||
|
||||
```js
|
||||
import {encode} from 'html-entities';
|
||||
|
||||
console.log(
|
||||
encode('<Hello & World>')
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
`instance.encodeNonASCII(text)` -> `encode(text, {mode: 'nonAscii'})`
|
||||
|
||||
Before:
|
||||
|
||||
```js
|
||||
import {AllHtmlEntities} from 'html-entities';
|
||||
|
||||
const entities = new AllHtmlEntities();
|
||||
console.log(
|
||||
entities.encodeNonASCII('& © ∆')
|
||||
);
|
||||
```
|
||||
|
||||
After:
|
||||
|
||||
```js
|
||||
import {encode} from 'html-entities';
|
||||
|
||||
console.log(
|
||||
encode('& © ∆', {mode: 'nonAscii'})
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
`instance.encodeNonASCII(text)` -> `encode(text, {mode: 'nonAsciiPrintable'})`
|
||||
|
||||
Before:
|
||||
|
||||
```js
|
||||
import {AllHtmlEntities} from 'html-entities';
|
||||
|
||||
const entities = new AllHtmlEntities();
|
||||
console.log(
|
||||
entities.encodeNonASCII('& © ∆ \x01')
|
||||
);
|
||||
```
|
||||
|
||||
After:
|
||||
|
||||
```js
|
||||
import {encode} from 'html-entities';
|
||||
|
||||
console.log(
|
||||
encode('& © ∆ \x01', {mode: 'nonAsciiPrintable'})
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
`instance.decode(text)` -> `decode(text)`
|
||||
|
||||
Before:
|
||||
|
||||
```js
|
||||
import {AllHtmlEntities} from 'html-entities';
|
||||
|
||||
const entities = new AllHtmlEntities();
|
||||
console.log(
|
||||
entities.decode('<>&')
|
||||
);
|
||||
```
|
||||
|
||||
After:
|
||||
|
||||
```js
|
||||
import {decode} from 'html-entities';
|
||||
|
||||
console.log(
|
||||
decode('<>&')
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Different XML/HTML versions are now implemented via options instead of different classes.
|
||||
|
||||
Before:
|
||||
|
||||
```js
|
||||
import {XmlEntities, Html4Entities, Html5Entities, AllHtmlEntities} from 'html-entities';
|
||||
|
||||
const xmlEntities = new XmlEntities();
|
||||
const html4Entities = new Html4Entities();
|
||||
const html5Entities = new Html5Entities();
|
||||
const allHtmlEntities = new AllHtmlEntities();
|
||||
|
||||
console.log(xmlEntities.encode('<>&'));
|
||||
console.log(html4Entities.encode('<>&©'));
|
||||
console.log(html5Entities.encode('<>&©℞'));
|
||||
console.log(allHtmlEntities.encode('<>&©℞'));
|
||||
|
||||
console.log(xmlEntities.decode('<>&'));
|
||||
console.log(html4Entities.decode('<>&©'));
|
||||
console.log(html5Entities.decode('<>&©℞'));
|
||||
console.log(allHtmlEntities.decode('<>&©℞'));
|
||||
```
|
||||
|
||||
After:
|
||||
|
||||
```js
|
||||
import {encode, decode} from 'html-entities';
|
||||
|
||||
console.log(encode('<>&', {level: 'xml'}));
|
||||
console.log(encode('<>&©', {level: 'html4', mode: 'nonAscii'}));
|
||||
console.log(encode('<>&©℞', {level: 'html5', mode: 'nonAscii'}));
|
||||
console.log(encode('<>&©℞', {level: 'all', mode: 'nonAscii'}));
|
||||
|
||||
console.log(decode('<>&', {level: 'xml'}));
|
||||
console.log(decode('<>&©', {level: 'html4'}));
|
||||
console.log(decode('<>&©℞', {level: 'html5'}));
|
||||
console.log(decode('<>&©℞', {level: 'all'}));
|
||||
```
|
|
@ -124,68 +124,68 @@ Common
|
|||
|
||||
Initialization / Load speed
|
||||
|
||||
* #1: html-entities x 2,544,400 ops/sec ±4.52% (77 runs sampled)
|
||||
#2: entities x 1,757,526 ops/sec ±3.99% (81 runs sampled)
|
||||
#3: he x 1,281,542 ops/sec ±9.31% (74 runs sampled)
|
||||
* #1: html-entities x 2,632,942 ops/sec ±3.71% (72 runs sampled)
|
||||
#2: entities x 1,379,154 ops/sec ±5.87% (75 runs sampled)
|
||||
#3: he x 1,334,035 ops/sec ±3.14% (83 runs sampled)
|
||||
|
||||
HTML5
|
||||
|
||||
Encode test
|
||||
|
||||
* #1: html-entities.encode - html5, nonAscii x 402,711 ops/sec ±0.61% (92 runs sampled)
|
||||
* #2: html-entities.encode - html5, nonAsciiPrintable x 402,631 ops/sec ±2.99% (92 runs sampled)
|
||||
* #3: html-entities.encode - html5, extensive x 269,162 ops/sec ±0.26% (97 runs sampled)
|
||||
#4: entities.encodeNonAsciiHTML x 260,447 ops/sec ±2.53% (95 runs sampled)
|
||||
#5: entities.encodeHTML x 101,059 ops/sec ±3.99% (91 runs sampled)
|
||||
#6: he.encode x 93,180 ops/sec ±3.17% (92 runs sampled)
|
||||
* #1: html-entities.encode - html5, nonAscii x 415,806 ops/sec ±0.73% (85 runs sampled)
|
||||
* #2: html-entities.encode - html5, nonAsciiPrintable x 401,420 ops/sec ±0.35% (93 runs sampled)
|
||||
#3: entities.encodeNonAsciiHTML x 401,235 ops/sec ±0.41% (88 runs sampled)
|
||||
#4: entities.encodeHTML x 284,868 ops/sec ±0.45% (93 runs sampled)
|
||||
* #5: html-entities.encode - html5, extensive x 237,613 ops/sec ±0.42% (93 runs sampled)
|
||||
#6: he.encode x 91,459 ops/sec ±0.50% (84 runs sampled)
|
||||
|
||||
Decode test
|
||||
|
||||
* #1: html-entities.decode - html5, attribute x 340,043 ops/sec ±2.82% (92 runs sampled)
|
||||
* #2: html-entities.decode - html5, body x 330,002 ops/sec ±1.52% (87 runs sampled)
|
||||
* #3: html-entities.decode - html5, strict x 320,582 ops/sec ±5.34% (88 runs sampled)
|
||||
#4: entities.decodeHTMLStrict x 286,294 ops/sec ±3.14% (89 runs sampled)
|
||||
#5: entities.decodeHTML x 232,856 ops/sec ±3.05% (90 runs sampled)
|
||||
#6: he.decode x 163,300 ops/sec ±0.62% (92 runs sampled)
|
||||
#1: entities.decodeHTMLStrict x 614,920 ops/sec ±0.41% (89 runs sampled)
|
||||
#2: entities.decodeHTML x 577,698 ops/sec ±0.44% (90 runs sampled)
|
||||
* #3: html-entities.decode - html5, strict x 323,680 ops/sec ±0.39% (92 runs sampled)
|
||||
* #4: html-entities.decode - html5, body x 297,548 ops/sec ±0.45% (91 runs sampled)
|
||||
* #5: html-entities.decode - html5, attribute x 293,617 ops/sec ±0.37% (94 runs sampled)
|
||||
#6: he.decode x 145,383 ops/sec ±0.36% (94 runs sampled)
|
||||
|
||||
HTML4
|
||||
|
||||
Encode test
|
||||
|
||||
* #1: html-entities.encode - html4, nonAsciiPrintable x 391,885 ops/sec ±0.27% (95 runs sampled)
|
||||
* #2: html-entities.encode - html4, nonAscii x 400,086 ops/sec ±2.54% (94 runs sampled)
|
||||
* #3: html-entities.encode - html4, extensive x 193,623 ops/sec ±2.70% (92 runs sampled)
|
||||
* #1: html-entities.encode - html4, nonAscii x 379,799 ops/sec ±0.29% (96 runs sampled)
|
||||
* #2: html-entities.encode - html4, nonAsciiPrintable x 350,003 ops/sec ±0.42% (92 runs sampled)
|
||||
* #3: html-entities.encode - html4, extensive x 169,759 ops/sec ±0.43% (90 runs sampled)
|
||||
|
||||
Decode test
|
||||
|
||||
* #1: html-entities.decode - html4, attribute x 356,174 ops/sec ±0.49% (96 runs sampled)
|
||||
* #2: html-entities.decode - html4, body x 342,666 ops/sec ±2.38% (91 runs sampled)
|
||||
* #3: html-entities.decode - html4, strict x 341,667 ops/sec ±4.46% (87 runs sampled)
|
||||
* #1: html-entities.decode - html4, attribute x 291,048 ops/sec ±0.42% (92 runs sampled)
|
||||
* #2: html-entities.decode - html4, strict x 287,110 ops/sec ±0.56% (93 runs sampled)
|
||||
* #3: html-entities.decode - html4, body x 285,529 ops/sec ±0.57% (93 runs sampled)
|
||||
|
||||
XML
|
||||
|
||||
Encode test
|
||||
|
||||
* #1: html-entities.encode - xml, nonAscii x 450,968 ops/sec ±2.73% (92 runs sampled)
|
||||
* #2: html-entities.encode - xml, nonAsciiPrintable x 432,058 ops/sec ±4.12% (93 runs sampled)
|
||||
* #3: html-entities.encode - xml, extensive x 265,336 ops/sec ±3.41% (93 runs sampled)
|
||||
#4: entities.encodeXML x 254,862 ops/sec ±3.01% (95 runs sampled)
|
||||
#1: entities.encodeXML x 418,561 ops/sec ±0.80% (90 runs sampled)
|
||||
* #2: html-entities.encode - xml, nonAsciiPrintable x 402,868 ops/sec ±0.30% (89 runs sampled)
|
||||
* #3: html-entities.encode - xml, nonAscii x 403,669 ops/sec ±7.87% (83 runs sampled)
|
||||
* #4: html-entities.encode - xml, extensive x 237,766 ops/sec ±0.45% (93 runs sampled)
|
||||
|
||||
Decode test
|
||||
|
||||
* #1: html-entities.decode - xml, strict x 432,820 ops/sec ±0.53% (89 runs sampled)
|
||||
* #2: html-entities.decode - xml, attribute x 426,037 ops/sec ±0.75% (94 runs sampled)
|
||||
* #3: html-entities.decode - xml, body x 424,618 ops/sec ±3.47% (93 runs sampled)
|
||||
#4: entities.decodeXML x 378,536 ops/sec ±2.48% (93 runs sampled)
|
||||
#1: entities.decodeXML x 888,700 ops/sec ±0.48% (93 runs sampled)
|
||||
* #2: html-entities.decode - xml, strict x 353,127 ops/sec ±0.40% (92 runs sampled)
|
||||
* #3: html-entities.decode - xml, body x 355,796 ops/sec ±1.58% (86 runs sampled)
|
||||
* #4: html-entities.decode - xml, attribute x 369,454 ops/sec ±8.74% (84 runs sampled)
|
||||
|
||||
Escaping
|
||||
|
||||
Escape test
|
||||
|
||||
* #1: html-entities.encode - xml, specialChars x 1,424,362 ops/sec ±0.55% (95 runs sampled)
|
||||
#2: he.escape x 962,420 ops/sec ±3.12% (94 runs sampled)
|
||||
#3: entities.escapeUTF8 x 443,138 ops/sec ±1.06% (90 runs sampled)
|
||||
#4: entities.escape x 197,515 ops/sec ±2.73% (91 runs sampled)
|
||||
#1: entities.escapeUTF8 x 1,308,013 ops/sec ±0.37% (91 runs sampled)
|
||||
* #2: html-entities.encode - xml, specialChars x 1,258,760 ops/sec ±1.00% (93 runs sampled)
|
||||
#3: he.escape x 822,569 ops/sec ±0.24% (94 runs sampled)
|
||||
#4: entities.escape x 434,243 ops/sec ±0.34% (91 runs sampled)
|
||||
```
|
||||
|
||||
License
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "html-entities",
|
||||
"version": "2.3.2",
|
||||
"version": "2.3.3",
|
||||
"description": "Fastest HTML entities encode/decode library.",
|
||||
"keywords": [
|
||||
"html",
|
||||
|
@ -15,7 +15,6 @@
|
|||
"name": "Marat Dulin",
|
||||
"email": "mdevils@yandex.ru"
|
||||
},
|
||||
"dependencies": {},
|
||||
"devDependencies": {
|
||||
"@types/benchmark": "^2.1.0",
|
||||
"@types/chai": "^4.2.11",
|
||||
|
@ -26,7 +25,7 @@
|
|||
"@typescript-eslint/parser": "^4.6.1",
|
||||
"benchmark": "^2.1.4",
|
||||
"chai": "^4.2.0",
|
||||
"entities": "^2.2.0",
|
||||
"entities": "^3.0.1",
|
||||
"eslint": "^7.12.1",
|
||||
"eslint-config-prettier": "^6.15.0",
|
||||
"eslint-plugin-import": "^2.22.1",
|
||||
|
@ -34,7 +33,7 @@
|
|||
"flowgen": "^1.13.0",
|
||||
"he": "^1.2.0",
|
||||
"husky": "^4.3.6",
|
||||
"mocha": "^7.1.2",
|
||||
"mocha": "^9.1.3",
|
||||
"prettier": "^2.1.2",
|
||||
"terser": "^5.6.1",
|
||||
"ts-node": "^8.9.1",
|
||||
|
@ -46,19 +45,20 @@
|
|||
"type": "git",
|
||||
"url": "https://github.com/mdevils/html-entities.git"
|
||||
},
|
||||
"sideEffects": false,
|
||||
"main": "./lib/index.js",
|
||||
"typings": "./lib/index.d.ts",
|
||||
"types": "./lib/index.d.ts",
|
||||
"scripts": {
|
||||
"test": "TS_NODE_COMPILER=ttypescript mocha --recursive -r ts-node/register test/**/*.ts",
|
||||
"test:lib": "TEST_LIB=1 yarn test",
|
||||
"test:lib": "TEST_LIB=1 npm run test",
|
||||
"benchmark": "TS_NODE_COMPILER=ttypescript ts-node benchmark/benchmark",
|
||||
"lint": "eslint src/**.ts",
|
||||
"flow-type-gen": "flowgen --add-flow-header lib/index.d.ts -o lib/index.js.flow",
|
||||
"remove-unused-declarations": "find lib -type f \\( -name '*.d.ts' ! -name index.d.ts \\) | xargs rm",
|
||||
"minimize-lib-files": "find lib -type f \\( -name '*.js' ! -name index.js \\) | while read fn; do terser $fn -o $fn; done",
|
||||
"build": "rm -Rf lib/* && ttsc && yarn remove-unused-declarations && yarn flow-type-gen && yarn minimize-lib-files && yarn test:lib",
|
||||
"prepublishOnly": "yarn build"
|
||||
"build": "rm -Rf lib/* && ttsc && npm run remove-unused-declarations && npm run flow-type-gen && npm run minimize-lib-files && npm run test:lib",
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"files": [
|
||||
"lib",
|
||||
|
@ -66,7 +66,7 @@
|
|||
],
|
||||
"husky": {
|
||||
"hooks": {
|
||||
"pre-commit": "yarn lint && yarn test"
|
||||
"pre-commit": "npm run lint && npm run test"
|
||||
}
|
||||
},
|
||||
"license": "MIT"
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
"@mozilla/readability": "^0.3.0",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^6.0.5",
|
||||
"html-entities": "^2.3.2",
|
||||
"html-entities": "^2.3.3",
|
||||
"jsdom": "^16.4.0",
|
||||
"justify-text": "^1.1.3",
|
||||
"turndown": "^7.0.0",
|
||||
|
@ -501,9 +501,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/html-entities": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
|
||||
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.3.tgz",
|
||||
"integrity": "sha512-DV5Ln36z34NNTDgnz0EWGBLZENelNAtkiFA4kyNOG2tDI6Mz1uSWiq1wAKdyjnJwyDiDO7Fa2SO1CTxPXL8VxA=="
|
||||
},
|
||||
"node_modules/http-errors": {
|
||||
"version": "1.8.1",
|
||||
|
@ -1545,9 +1545,9 @@
|
|||
}
|
||||
},
|
||||
"html-entities": {
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
|
||||
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.3.tgz",
|
||||
"integrity": "sha512-DV5Ln36z34NNTDgnz0EWGBLZENelNAtkiFA4kyNOG2tDI6Mz1uSWiq1wAKdyjnJwyDiDO7Fa2SO1CTxPXL8VxA=="
|
||||
},
|
||||
"http-errors": {
|
||||
"version": "1.8.1",
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
"@mozilla/readability": "^0.3.0",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^6.0.5",
|
||||
"html-entities": "^2.3.2",
|
||||
"html-entities": "^2.3.3",
|
||||
"jsdom": "^16.4.0",
|
||||
"justify-text": "^1.1.3",
|
||||
"turndown": "^7.0.0",
|
||||
|
|
Loading…
Reference in New Issue