custom table handler

main
Lee Hanken 2022-01-30 15:04:54 +00:00
parent 4db476881a
commit a084fa23f5
21 changed files with 901 additions and 924 deletions

124
index.js
View File

@ -1,20 +1,18 @@
const https = require('https');
const turndown = require('turndown');
const turndownPluginGfm = require('turndown-plugin-gfm')
const { Readability } = require('@mozilla/readability');
const JSDOM = require('jsdom').JSDOM;
const common_filters = require('./url_to_markdown_common_filters');
const validURL = require('@7c/validurl');
const express = require('express');
const rateLimit = require('express-rate-limit');
const htmlEntities = require('html-entities');
const port = process.env.PORT;
const app = express();
const service = new turndown();
const tables = turndownPluginGfm.tables
service.use(tables)
const rateLimiter = rateLimit({
windowMs: 30 * 1000,
@ -51,9 +49,9 @@ app.post('/', function(req, res) {
let markdown = process_dom(url, document, res);
send_headers(res);
res.send(markdown);
//} catch (error) {
// res.status(400).send("Could not parse that document");
//}
/*} catch (error) {
res.status(400).send("Could not parse that document");
}*/
}
});
@ -72,8 +70,13 @@ function process_dom(url, document, res) {
if (title)
res.header("X-Title", encodeURIComponent(title.textContent));
let reader = new Readability(document.window.document);
let readable = reader.parse();
let markdown = service.turndown(readable.content);
let readable = reader.parse().content;
let replacement = {placeholders:[], tables:[]}
readable = format_tables(readable, replacement);
let markdown = service.turndown(readable);
for (let i=0;i<replacement.placeholders.length;i++) {
markdown = markdown.replace(replacement.placeholders[i], replacement.tables[i]);
}
let result = (url) ? common_filters.filter(url, markdown) : markdown;
return result;
}
@ -86,3 +89,108 @@ function read_url(url, res) {
res.status(400).send("Sorry, could not fetch and convert that URL");
});
}
function clean(str) {
str = str.replace(/<\/?[^>]+(>|$)/g, "");
str = str.replace(/(\r\n|\n|\r)/gm, "");
str = htmlEntities.decode(str);
return str;
}
function format_table(table) {
let result = "\n";
let caption = table.match(/<caption[^>]*>((?:.|\n)*)<\/caption>/i);
if (caption)
result += clean(caption[1]) + "\n\n";
let items = [];
// collect data
let rows = table.match(/(<tr[^>]*>(?:.|\n)*?<\/tr>)/gi);
let n_rows = rows.length;
for (let r=0;r<n_rows;r++) {
let item_cols = [];
let cols = rows[r].match(/<t[h|d][^>]*>(?:.|\n)*?<\/t[h|d]>/gi);
for (let c=0;c<cols.length;c++)
item_cols.push(clean(cols[c]));
items.push(item_cols);
}
// find number of columns
let n_cols=0;
for (let r=0;r<n_rows;r++) {
if (items[r].length > n_cols) {
n_cols = items[r].length;
}
}
// normalise columns
for (let r=0;r<n_rows;r++) {
for (let c=0;c<n_cols;c++) {
if (typeof items[r][c] === 'undefined') {
items[r].push("");
}
}
}
// correct widths
let column_widths = [];
for (let r=0;r<n_rows;r++) {
for (let c=0;c<n_cols;c++) {
column_widths.push(0);
}
for (let c=0;c<n_cols;c++) {
let l = items[r][c].length;
if (l>column_widths[c]) {
column_widths[c]=l;
}
}
}
for (let r=0;r<n_rows;r++) {
for (let c=0;c<n_cols;c++) {
items[r][c] = items[r][c].padEnd(column_widths[c], " ");
}
}
// output table
if (n_rows >0 && n_cols > 0) {
if (n_rows > 1) {
result += "|";
for (let c=0;c<n_cols;c++) {
result += items[0][c];
result += "|";
}
}
result += "\n";
result += "|";
for (let c=0;c<n_cols;c++) {
result += "-".repeat(column_widths[c]) + "|";
}
result += "\n";
for (let r=1;r<n_rows;r++) {
result += "|";
for (let c=0;c<n_cols;c++) {
result += items[r][c];
result += "|";
}
result += "\n";
}
}
return result;
}
function format_tables(html, replacements) {
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
for (let t=0;t<tables.length;t++) {
let table = tables[t];
let markdown = format_table(table);
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
replacements.placeholders[t] = placeholder;
replacements.tables[t] = markdown;
html = html.replace(table, "<p>"+placeholder+"</p>");
}
return html;
}

10
node_modules/.package-lock.json generated vendored
View File

@ -484,6 +484,11 @@
"node": ">=10"
}
},
"node_modules/html-entities": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
},
"node_modules/http-errors": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.8.1.tgz",
@ -990,11 +995,6 @@
"domino": "^2.1.6"
}
},
"node_modules/turndown-plugin-gfm": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
},
"node_modules/type-check": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",

205
node_modules/html-entities/CHANGELOG.md generated vendored Normal file
View File

@ -0,0 +1,205 @@
2.3.2
-----
* Minimize data files, remove unnecessary files.
2.3.1
-----
* Improve performance of `encode()`, `decode()` and `decodeEntity()` by using function inlining.
* Fix decoding HEX HTML entities in some cases.
2.3.0
-----
* Add flow types.
2.2.0
-----
* A fast `decodeEntity()` method to decode a single HTML entity.
2.1.1
-----
* Speed up both `encode()` and `decode()` methods.
2.1.0
-----
* Add `extensive` mode to `encode()` method. This mode encodes all non-printable characters, non-ASCII characters and all characters with named references.
2.0.6
-----
* Handle invalid numeric HTML entities: mimic browser behaviour.
2.0.5
-----
* Handling behaviour of ambiguous ampersands.
2.0.4
-----
* Fix webpack build warning.
2.0.3
-----
* Handle invalid numeric HTML entities.
2.0.2
-----
* Handle `null` and `undefined` text values.
2.0.1
-----
* Fix decoding numeric HTML entities.
2.0.0
-----
* Performance was greatly improved.
* New API: simpler and more flexible.
`htmlEntitiesInstance.encode(text)` -> `encode(text)`
Before:
```js
import {AllHtmlEntities} from 'html-entities';
const entities = new AllHtmlEntities();
console.log(
entities.encode('<Hello & World>')
);
```
After:
```js
import {encode} from 'html-entities';
console.log(
encode('<Hello & World>')
);
```
---
`instance.encodeNonASCII(text)` -> `encode(text, {mode: 'nonAscii'})`
Before:
```js
import {AllHtmlEntities} from 'html-entities';
const entities = new AllHtmlEntities();
console.log(
entities.encodeNonASCII('& © ∆')
);
```
After:
```js
import {encode} from 'html-entities';
console.log(
encode('& © ∆', {mode: 'nonAscii'})
);
```
---
`instance.encodeNonASCII(text)` -> `encode(text, {mode: 'nonAsciiPrintable'})`
Before:
```js
import {AllHtmlEntities} from 'html-entities';
const entities = new AllHtmlEntities();
console.log(
entities.encodeNonASCII('& © ∆ \x01')
);
```
After:
```js
import {encode} from 'html-entities';
console.log(
encode('& © ∆ \x01', {mode: 'nonAsciiPrintable'})
);
```
---
`instance.decode(text)` -> `decode(text)`
Before:
```js
import {AllHtmlEntities} from 'html-entities';
const entities = new AllHtmlEntities();
console.log(
entities.decode('&lt;&gt;&amp;')
);
```
After:
```js
import {decode} from 'html-entities';
console.log(
decode('&lt;&gt;&amp;')
);
```
---
Different XML/HTML versions are now implemented via options instead of different classes.
Before:
```js
import {XmlEntities, Html4Entities, Html5Entities, AllHtmlEntities} from 'html-entities';
const xmlEntities = new XmlEntities();
const html4Entities = new Html4Entities();
const html5Entities = new Html5Entities();
const allHtmlEntities = new AllHtmlEntities();
console.log(xmlEntities.encode('<>&'));
console.log(html4Entities.encode('<>&©'));
console.log(html5Entities.encode('<>&©℞'));
console.log(allHtmlEntities.encode('<>&©℞'));
console.log(xmlEntities.decode('&lt;&gt;&amp;'));
console.log(html4Entities.decode('&lt;&gt;&amp;&copy;'));
console.log(html5Entities.decode('&lt;&gt;&amp;&copy;&rx;'));
console.log(allHtmlEntities.decode('&lt;&gt;&amp;&copy;&rx;'));
```
After:
```js
import {encode, decode} from 'html-entities';
console.log(encode('<>&', {level: 'xml'}));
console.log(encode('<>&©', {level: 'html4', mode: 'nonAscii'}));
console.log(encode('<>&©℞', {level: 'html5', mode: 'nonAscii'}));
console.log(encode('<>&©℞', {level: 'all', mode: 'nonAscii'}));
console.log(decode('&lt;&gt;&amp;', {level: 'xml'}));
console.log(decode('&lt;&gt;&amp;&copy;', {level: 'html4'}));
console.log(decode('&lt;&gt;&amp;&copy;&rx;', {level: 'html5'}));
console.log(decode('&lt;&gt;&amp;&copy;&rx;', {level: 'all'}));
```

View File

@ -1,6 +1,4 @@
MIT License
Copyright (c) 2017 Dom Christie
Copyright (c) 2021 Dulin Marat
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@ -9,13 +7,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

213
node_modules/html-entities/README.md generated vendored Normal file
View File

@ -0,0 +1,213 @@
html-entities
=============
Fastest HTML entities library.
Comes with both TypeScript and Flow types.
Installation
------------
```bash
$ npm install html-entities
```
Usage
-----
### encode(text, options)
Encodes text replacing HTML special characters (`<>&"'`) plus other character ranges depending on `mode` option value.
```js
import {encode} from 'html-entities';
encode('< > " \' & © ∆');
// -> '&lt; &gt; &quot; &apos; &amp; © ∆'
encode('< ©', {mode: 'nonAsciiPrintable'});
// -> '&lt; &copy;'
encode('< ©', {mode: 'nonAsciiPrintable', level: 'xml'});
// -> '&lt; &#169;'
```
Options:
#### level
* `all` alias to `html5` (default).
* `html5` uses `HTML5` named references.
* `html4` uses `HTML4` named references.
* `xml` uses `XML` named references.
#### mode
* `specialChars` encodes only HTML special characters (default).
* `nonAscii` encodes HTML special characters and everything outside of the [ASCII character range](https://en.wikipedia.org/wiki/ASCII).
* `nonAsciiPrintable` encodes HTML special characters and everything outiside of the [ASCII printable characters](https://en.wikipedia.org/wiki/ASCII#Printable_characters).
* `extensive` encodes all non-printable characters, non-ASCII characters and all characters with named references.
#### numeric
* `decimal` uses decimal numbers when encoding html entities. i.e. `&#169;` (default).
* `hexadecimal` uses hexadecimal numbers when encoding html entities. i.e. `&#xa9;`.
### decode(text, options)
Decodes text replacing entities to characters. Unknown entities are left as is.
```js
import {decode} from 'html-entities';
decode('&lt; &gt; &quot; &apos; &amp; &#169; &#8710;');
// -> '< > " \' & © ∆'
decode('&copy;', {level: 'html5'});
// -> '©'
decode('&copy;', {level: 'xml'});
// -> '&copy;'
```
Options:
#### level
* `all` alias to `html5` (default).
* `html5` uses `HTML5` named references.
* `html4` uses `HTML4` named references.
* `xml` uses `XML` named references.
#### scope
* `body` emulates behavior of browser when parsing tag bodies: entities without semicolon are also replaced (default).
* `attribute` emulates behavior of browser when parsing tag attributes: entities without semicolon are replaced when not followed by equality sign `=`.
* `strict` ignores entities without semicolon.
### decodeEntity(text, options)
Decodes a single HTML entity. Unknown entitiy is left as is.
```js
import {decodeEntity} from 'html-entities';
decodeEntity('&lt;');
// -> '<'
decodeEntity('&copy;', {level: 'html5'});
// -> '©'
decodeEntity('&copy;', {level: 'xml'});
// -> '&copy;'
```
Options:
#### level
* `all` alias to `html5` (default).
* `html5` uses `HTML5` named references.
* `html4` uses `HTML4` named references.
* `xml` uses `XML` named references.
Performance
-----------
Statistically significant comparison with other libraries using `benchmark.js`.
Results by this library are marked with `*`.
The source code of the benchmark is available at `benchmark/benchmark.ts`.
```
Common
Initialization / Load speed
* #1: html-entities x 2,544,400 ops/sec ±4.52% (77 runs sampled)
#2: entities x 1,757,526 ops/sec ±3.99% (81 runs sampled)
#3: he x 1,281,542 ops/sec ±9.31% (74 runs sampled)
HTML5
Encode test
* #1: html-entities.encode - html5, nonAscii x 402,711 ops/sec ±0.61% (92 runs sampled)
* #2: html-entities.encode - html5, nonAsciiPrintable x 402,631 ops/sec ±2.99% (92 runs sampled)
* #3: html-entities.encode - html5, extensive x 269,162 ops/sec ±0.26% (97 runs sampled)
#4: entities.encodeNonAsciiHTML x 260,447 ops/sec ±2.53% (95 runs sampled)
#5: entities.encodeHTML x 101,059 ops/sec ±3.99% (91 runs sampled)
#6: he.encode x 93,180 ops/sec ±3.17% (92 runs sampled)
Decode test
* #1: html-entities.decode - html5, attribute x 340,043 ops/sec ±2.82% (92 runs sampled)
* #2: html-entities.decode - html5, body x 330,002 ops/sec ±1.52% (87 runs sampled)
* #3: html-entities.decode - html5, strict x 320,582 ops/sec ±5.34% (88 runs sampled)
#4: entities.decodeHTMLStrict x 286,294 ops/sec ±3.14% (89 runs sampled)
#5: entities.decodeHTML x 232,856 ops/sec ±3.05% (90 runs sampled)
#6: he.decode x 163,300 ops/sec ±0.62% (92 runs sampled)
HTML4
Encode test
* #1: html-entities.encode - html4, nonAsciiPrintable x 391,885 ops/sec ±0.27% (95 runs sampled)
* #2: html-entities.encode - html4, nonAscii x 400,086 ops/sec ±2.54% (94 runs sampled)
* #3: html-entities.encode - html4, extensive x 193,623 ops/sec ±2.70% (92 runs sampled)
Decode test
* #1: html-entities.decode - html4, attribute x 356,174 ops/sec ±0.49% (96 runs sampled)
* #2: html-entities.decode - html4, body x 342,666 ops/sec ±2.38% (91 runs sampled)
* #3: html-entities.decode - html4, strict x 341,667 ops/sec ±4.46% (87 runs sampled)
XML
Encode test
* #1: html-entities.encode - xml, nonAscii x 450,968 ops/sec ±2.73% (92 runs sampled)
* #2: html-entities.encode - xml, nonAsciiPrintable x 432,058 ops/sec ±4.12% (93 runs sampled)
* #3: html-entities.encode - xml, extensive x 265,336 ops/sec ±3.41% (93 runs sampled)
#4: entities.encodeXML x 254,862 ops/sec ±3.01% (95 runs sampled)
Decode test
* #1: html-entities.decode - xml, strict x 432,820 ops/sec ±0.53% (89 runs sampled)
* #2: html-entities.decode - xml, attribute x 426,037 ops/sec ±0.75% (94 runs sampled)
* #3: html-entities.decode - xml, body x 424,618 ops/sec ±3.47% (93 runs sampled)
#4: entities.decodeXML x 378,536 ops/sec ±2.48% (93 runs sampled)
Escaping
Escape test
* #1: html-entities.encode - xml, specialChars x 1,424,362 ops/sec ±0.55% (95 runs sampled)
#2: he.escape x 962,420 ops/sec ±3.12% (94 runs sampled)
#3: entities.escapeUTF8 x 443,138 ops/sec ±1.06% (90 runs sampled)
#4: entities.escape x 197,515 ops/sec ±2.73% (91 runs sampled)
```
License
-------
MIT
Security contact information
----------------------------
To report a security vulnerability, please use the
[Tidelift security contact](https://tidelift.com/security). Tidelift will
coordinate the fix and disclosure.
`html-entities` for enterprise
------------------------------
Available as part of the Tidelift Subscription
The maintainers of `html-entities` and thousands of other packages are working with
Tidelift to deliver commercial support and maintenance for the open source
dependencies you use to build your applications. Save time, reduce risk, and
improve code health, while paying the maintainers of the exact dependencies you
use.
[Learn more.](https://tidelift.com/subscription/pkg/npm-html-entities?utm_source=npm-html-entities&utm_medium=referral&utm_campaign=enterprise)

20
node_modules/html-entities/lib/index.d.ts generated vendored Normal file
View File

@ -0,0 +1,20 @@
export declare type Level = 'xml' | 'html4' | 'html5' | 'all';
interface CommonOptions {
level?: Level;
}
export declare type EncodeMode = 'specialChars' | 'nonAscii' | 'nonAsciiPrintable' | 'extensive';
export interface EncodeOptions extends CommonOptions {
mode?: EncodeMode;
numeric?: 'decimal' | 'hexadecimal';
}
export declare type DecodeScope = 'strict' | 'body' | 'attribute';
export interface DecodeOptions extends CommonOptions {
scope?: DecodeScope;
}
/** Encodes all the necessary (specified by `level`) characters in the text */
export declare function encode(text: string | undefined | null, { mode, numeric, level }?: EncodeOptions): string;
/** Decodes a single entity */
export declare function decodeEntity(entity: string | undefined | null, { level }?: CommonOptions): string;
/** Decodes all entities in the text */
export declare function decode(text: string | undefined | null, { level, scope }?: DecodeOptions): string;
export {};

198
node_modules/html-entities/lib/index.js generated vendored Normal file
View File

@ -0,0 +1,198 @@
"use strict";
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
Object.defineProperty(exports, "__esModule", { value: true });
var named_references_1 = require("./named-references");
var numeric_unicode_map_1 = require("./numeric-unicode-map");
var surrogate_pairs_1 = require("./surrogate-pairs");
var allNamedReferences = __assign(__assign({}, named_references_1.namedReferences), { all: named_references_1.namedReferences.html5 });
var encodeRegExps = {
specialChars: /[<>'"&]/g,
nonAscii: /(?:[<>'"&\u0080-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g,
nonAsciiPrintable: /(?:[<>'"&\x01-\x08\x11-\x15\x17-\x1F\x7f-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g,
extensive: /(?:[\x01-\x0c\x0e-\x1f\x21-\x2c\x2e-\x2f\x3a-\x40\x5b-\x60\x7b-\x7d\x7f-\uD7FF\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])/g
};
var defaultEncodeOptions = {
mode: 'specialChars',
level: 'all',
numeric: 'decimal'
};
/** Encodes all the necessary (specified by `level`) characters in the text */
function encode(text, _a) {
var _b = _a === void 0 ? defaultEncodeOptions : _a, _c = _b.mode, mode = _c === void 0 ? 'specialChars' : _c, _d = _b.numeric, numeric = _d === void 0 ? 'decimal' : _d, _e = _b.level, level = _e === void 0 ? 'all' : _e;
if (!text) {
return '';
}
var encodeRegExp = encodeRegExps[mode];
var references = allNamedReferences[level].characters;
var isHex = numeric === 'hexadecimal';
encodeRegExp.lastIndex = 0;
var _b = encodeRegExp.exec(text);
var _c;
if (_b) {
_c = '';
var _d = 0;
do {
if (_d !== _b.index) {
_c += text.substring(_d, _b.index);
}
var _e = _b[0];
var result_1 = references[_e];
if (!result_1) {
var code_1 = _e.length > 1 ? surrogate_pairs_1.getCodePoint(_e, 0) : _e.charCodeAt(0);
result_1 = (isHex ? '&#x' + code_1.toString(16) : '&#' + code_1) + ';';
}
_c += result_1;
_d = _b.index + _e.length;
} while ((_b = encodeRegExp.exec(text)));
if (_d !== text.length) {
_c += text.substring(_d);
}
}
else {
_c =
text;
}
return _c;
}
exports.encode = encode;
var defaultDecodeOptions = {
scope: 'body',
level: 'all'
};
var strict = /&(?:#\d+|#[xX][\da-fA-F]+|[0-9a-zA-Z]+);/g;
var attribute = /&(?:#\d+|#[xX][\da-fA-F]+|[0-9a-zA-Z]+)[;=]?/g;
var baseDecodeRegExps = {
xml: {
strict: strict,
attribute: attribute,
body: named_references_1.bodyRegExps.xml
},
html4: {
strict: strict,
attribute: attribute,
body: named_references_1.bodyRegExps.html4
},
html5: {
strict: strict,
attribute: attribute,
body: named_references_1.bodyRegExps.html5
}
};
var decodeRegExps = __assign(__assign({}, baseDecodeRegExps), { all: baseDecodeRegExps.html5 });
var fromCharCode = String.fromCharCode;
var outOfBoundsChar = fromCharCode(65533);
var defaultDecodeEntityOptions = {
level: 'all'
};
/** Decodes a single entity */
function decodeEntity(entity, _a) {
var _b = (_a === void 0 ? defaultDecodeEntityOptions : _a).level, level = _b === void 0 ? 'all' : _b;
if (!entity) {
return '';
}
var _b = entity;
var decodeEntityLastChar_1 = entity[entity.length - 1];
if (false
&& decodeEntityLastChar_1 === '=') {
_b =
entity;
}
else if (false
&& decodeEntityLastChar_1 !== ';') {
_b =
entity;
}
else {
var decodeResultByReference_1 = allNamedReferences[level].entities[entity];
if (decodeResultByReference_1) {
_b = decodeResultByReference_1;
}
else if (entity[0] === '&' && entity[1] === '#') {
var decodeSecondChar_1 = entity[2];
var decodeCode_1 = decodeSecondChar_1 == 'x' || decodeSecondChar_1 == 'X'
? parseInt(entity.substr(3), 16)
: parseInt(entity.substr(2));
_b =
decodeCode_1 >= 0x10ffff
? outOfBoundsChar
: decodeCode_1 > 65535
? surrogate_pairs_1.fromCodePoint(decodeCode_1)
: fromCharCode(numeric_unicode_map_1.numericUnicodeMap[decodeCode_1] || decodeCode_1);
}
}
return _b;
}
exports.decodeEntity = decodeEntity;
/** Decodes all entities in the text */
function decode(text, _a) {
var decodeSecondChar_1 = _a === void 0 ? defaultDecodeOptions : _a, decodeCode_1 = decodeSecondChar_1.level, level = decodeCode_1 === void 0 ? 'all' : decodeCode_1, _b = decodeSecondChar_1.scope, scope = _b === void 0 ? level === 'xml' ? 'strict' : 'body' : _b;
if (!text) {
return '';
}
var decodeRegExp = decodeRegExps[level][scope];
var references = allNamedReferences[level].entities;
var isAttribute = scope === 'attribute';
var isStrict = scope === 'strict';
decodeRegExp.lastIndex = 0;
var replaceMatch_1 = decodeRegExp.exec(text);
var replaceResult_1;
if (replaceMatch_1) {
replaceResult_1 = '';
var replaceLastIndex_1 = 0;
do {
if (replaceLastIndex_1 !== replaceMatch_1.index) {
replaceResult_1 += text.substring(replaceLastIndex_1, replaceMatch_1.index);
}
var replaceInput_1 = replaceMatch_1[0];
var decodeResult_1 = replaceInput_1;
var decodeEntityLastChar_2 = replaceInput_1[replaceInput_1.length - 1];
if (isAttribute
&& decodeEntityLastChar_2 === '=') {
decodeResult_1 = replaceInput_1;
}
else if (isStrict
&& decodeEntityLastChar_2 !== ';') {
decodeResult_1 = replaceInput_1;
}
else {
var decodeResultByReference_2 = references[replaceInput_1];
if (decodeResultByReference_2) {
decodeResult_1 = decodeResultByReference_2;
}
else if (replaceInput_1[0] === '&' && replaceInput_1[1] === '#') {
var decodeSecondChar_2 = replaceInput_1[2];
var decodeCode_2 = decodeSecondChar_2 == 'x' || decodeSecondChar_2 == 'X'
? parseInt(replaceInput_1.substr(3), 16)
: parseInt(replaceInput_1.substr(2));
decodeResult_1 =
decodeCode_2 >= 0x10ffff
? outOfBoundsChar
: decodeCode_2 > 65535
? surrogate_pairs_1.fromCodePoint(decodeCode_2)
: fromCharCode(numeric_unicode_map_1.numericUnicodeMap[decodeCode_2] || decodeCode_2);
}
}
replaceResult_1 += decodeResult_1;
replaceLastIndex_1 = replaceMatch_1.index + replaceInput_1.length;
} while ((replaceMatch_1 = decodeRegExp.exec(text)));
if (replaceLastIndex_1 !== text.length) {
replaceResult_1 += text.substring(replaceLastIndex_1);
}
}
else {
replaceResult_1 =
text;
}
return replaceResult_1;
}
exports.decode = decode;

51
node_modules/html-entities/lib/index.js.flow generated vendored Normal file
View File

@ -0,0 +1,51 @@
/**
* Flowtype definitions for index
* Generated by Flowgen from a Typescript Definition
* Flowgen v1.13.0
* @flow
*/
export type Level = "xml" | "html4" | "html5" | "all";
declare interface CommonOptions {
level?: Level;
}
export type EncodeMode =
| "specialChars"
| "nonAscii"
| "nonAsciiPrintable"
| "extensive";
export type EncodeOptions = {
mode?: EncodeMode,
numeric?: "decimal" | "hexadecimal",
...
} & CommonOptions;
export type DecodeScope = "strict" | "body" | "attribute";
export type DecodeOptions = {
scope?: DecodeScope,
...
} & CommonOptions;
/**
* Encodes all the necessary (specified by `level`) characters in the text
*/
declare export function encode(
text: string | void | null,
x?: EncodeOptions
): string;
/**
* Decodes a single entity
*/
declare export function decodeEntity(
entity: string | void | null,
x?: CommonOptions
): string;
/**
* Decodes all entities in the text
*/
declare export function decode(
text: string | void | null,
x?: DecodeOptions
): string;
declare export {};

1
node_modules/html-entities/lib/named-references.js generated vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.numericUnicodeMap={0:65533,128:8364,130:8218,131:402,132:8222,133:8230,134:8224,135:8225,136:710,137:8240,138:352,139:8249,140:338,142:381,145:8216,146:8217,147:8220,148:8221,149:8226,150:8211,151:8212,152:732,153:8482,154:353,155:8250,156:339,158:382,159:376};

1
node_modules/html-entities/lib/surrogate-pairs.js generated vendored Normal file
View File

@ -0,0 +1 @@
"use strict";Object.defineProperty(exports,"__esModule",{value:true});exports.fromCodePoint=String.fromCodePoint||function(astralCodePoint){return String.fromCharCode(Math.floor((astralCodePoint-65536)/1024)+55296,(astralCodePoint-65536)%1024+56320)};exports.getCodePoint=String.prototype.codePointAt?function(input,position){return input.codePointAt(position)}:function(input,position){return(input.charCodeAt(position)-55296)*1024+input.charCodeAt(position+1)-56320+65536};exports.highSurrogateFrom=55296;exports.highSurrogateTo=56319;

73
node_modules/html-entities/package.json generated vendored Normal file
View File

@ -0,0 +1,73 @@
{
"name": "html-entities",
"version": "2.3.2",
"description": "Fastest HTML entities encode/decode library.",
"keywords": [
"html",
"html entities",
"html entities encode",
"html entities decode",
"entities",
"entities encode",
"entities decode"
],
"author": {
"name": "Marat Dulin",
"email": "mdevils@yandex.ru"
},
"dependencies": {},
"devDependencies": {
"@types/benchmark": "^2.1.0",
"@types/chai": "^4.2.11",
"@types/he": "^1.1.1",
"@types/mocha": "^7.0.2",
"@types/node": "^13.13.4",
"@typescript-eslint/eslint-plugin": "^4.6.1",
"@typescript-eslint/parser": "^4.6.1",
"benchmark": "^2.1.4",
"chai": "^4.2.0",
"entities": "^2.2.0",
"eslint": "^7.12.1",
"eslint-config-prettier": "^6.15.0",
"eslint-plugin-import": "^2.22.1",
"eslint-plugin-prettier": "^3.1.4",
"flowgen": "^1.13.0",
"he": "^1.2.0",
"husky": "^4.3.6",
"mocha": "^7.1.2",
"prettier": "^2.1.2",
"terser": "^5.6.1",
"ts-node": "^8.9.1",
"ttypescript": "^1.5.12",
"typescript": "^3.8.3",
"typescript-transform-macros": "^1.1.1"
},
"repository": {
"type": "git",
"url": "https://github.com/mdevils/html-entities.git"
},
"main": "./lib/index.js",
"typings": "./lib/index.d.ts",
"types": "./lib/index.d.ts",
"scripts": {
"test": "TS_NODE_COMPILER=ttypescript mocha --recursive -r ts-node/register test/**/*.ts",
"test:lib": "TEST_LIB=1 yarn test",
"benchmark": "TS_NODE_COMPILER=ttypescript ts-node benchmark/benchmark",
"lint": "eslint src/**.ts",
"flow-type-gen": "flowgen --add-flow-header lib/index.d.ts -o lib/index.js.flow",
"remove-unused-declarations": "find lib -type f \\( -name '*.d.ts' ! -name index.d.ts \\) | xargs rm",
"minimize-lib-files": "find lib -type f \\( -name '*.js' ! -name index.js \\) | while read fn; do terser $fn -o $fn; done",
"build": "rm -Rf lib/* && ttsc && yarn remove-unused-declarations && yarn flow-type-gen && yarn minimize-lib-files && yarn test:lib",
"prepublishOnly": "yarn build"
},
"files": [
"lib",
"LICENSE"
],
"husky": {
"hooks": {
"pre-commit": "yarn lint && yarn test"
}
},
"license": "MIT"
}

View File

@ -1,50 +0,0 @@
# turndown-plugin-gfm
A [Turndown](https://github.com/domchristie/turndown) plugin which adds GitHub Flavored Markdown extensions.
## Installation
npm:
```
npm install turndown-plugin-gfm
```
Browser:
```html
<script src="https://unpkg.com/turndown/dist/turndown.js"></script>
<script src="https://unpkg.com/turndown-plugin-gfm/dist/turndown-plugin-gfm.js"></script>
```
## Usage
```js
// For Node.js
var TurndownService = require('turndown')
var turndownPluginGfm = require('turndown-plugin-gfm')
var gfm = turndownPluginGfm.gfm
var turndownService = new TurndownService()
turndownService.use(gfm)
var markdown = turndownService.turndown('<strike>Hello world!</strike>')
```
turndown-plugin-gfm is a suite of plugins which can be applied individually. The available plugins are as follows:
- `strikethrough` (for converting `<strike>`, `<s>`, and `<del>` elements)
- `tables`
- `taskListItems`
- `gfm` (which applies all of the above)
So for example, if you only wish to convert tables:
```js
var tables = require('turndown-plugin-gfm').tables
var turndownService = new TurndownService()
turndownService.use(tables)
```
## License
turndown-plugin-gfm is copyright © 2017+ Dom Christie and released under the MIT license.

View File

@ -1,165 +0,0 @@
var turndownPluginGfm = (function (exports) {
'use strict';
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}
function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
});
}
var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
return cell(content, node)
}
};
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };
if (isHeadingRow(node)) {
for (var i = 0; i < node.childNodes.length; i++) {
var border = '---';
var align = (
node.childNodes[i].getAttribute('align') || ''
).toLowerCase();
if (align) border = alignMap[align] || border;
borderCells += cell(border, node.childNodes[i]);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
},
replacement: function (content) {
// Ensure there are no blank lines
content = content.replace('\n\n', '\n');
return '\n\n' + content + '\n\n'
}
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node) {
var index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
return prefix + content + ' |'
}
function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}
function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}
exports.gfm = gfm;
exports.highlightedCodeBlock = highlightedCodeBlock;
exports.strikethrough = strikethrough;
exports.tables = tables;
exports.taskListItems = taskListItems;
return exports;
}({}));

View File

@ -1,162 +0,0 @@
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}
function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
});
}
var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
return cell(content, node)
}
};
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };
if (isHeadingRow(node)) {
for (var i = 0; i < node.childNodes.length; i++) {
var border = '---';
var align = (
node.childNodes[i].getAttribute('align') || ''
).toLowerCase();
if (align) border = alignMap[align] || border;
borderCells += cell(border, node.childNodes[i]);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
},
replacement: function (content) {
// Ensure there are no blank lines
content = content.replace('\n\n', '\n');
return '\n\n' + content + '\n\n'
}
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node) {
var index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
return prefix + content + ' |'
}
function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}
function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}
exports.gfm = gfm;
exports.highlightedCodeBlock = highlightedCodeBlock;
exports.strikethrough = strikethrough;
exports.tables = tables;
exports.taskListItems = taskListItems;

View File

@ -1,154 +0,0 @@
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}
function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
});
}
var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
return cell(content, node)
}
};
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };
if (isHeadingRow(node)) {
for (var i = 0; i < node.childNodes.length; i++) {
var border = '---';
var align = (
node.childNodes[i].getAttribute('align') || ''
).toLowerCase();
if (align) border = alignMap[align] || border;
borderCells += cell(border, node.childNodes[i]);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
},
replacement: function (content) {
// Ensure there are no blank lines
content = content.replace('\n\n', '\n');
return '\n\n' + content + '\n\n'
}
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node) {
var index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
return prefix + content + ' |'
}
function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}
function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems };

View File

@ -1,162 +0,0 @@
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}
function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
});
}
var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
return cell(content, node)
}
};
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };
if (isHeadingRow(node)) {
for (var i = 0; i < node.childNodes.length; i++) {
var border = '---';
var align = (
node.childNodes[i].getAttribute('align') || ''
).toLowerCase();
if (align) border = alignMap[align] || border;
borderCells += cell(border, node.childNodes[i]);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
},
replacement: function (content) {
// Ensure there are no blank lines
content = content.replace('\n\n', '\n');
return '\n\n' + content + '\n\n'
}
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node) {
var index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
return prefix + content + ' |'
}
function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}
function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}
exports.gfm = gfm;
exports.highlightedCodeBlock = highlightedCodeBlock;
exports.strikethrough = strikethrough;
exports.tables = tables;
exports.taskListItems = taskListItems;

View File

@ -1,154 +0,0 @@
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
function highlightedCodeBlock (turndownService) {
turndownService.addRule('highlightedCodeBlock', {
filter: function (node) {
var firstChild = node.firstChild;
return (
node.nodeName === 'DIV' &&
highlightRegExp.test(node.className) &&
firstChild &&
firstChild.nodeName === 'PRE'
)
},
replacement: function (content, node, options) {
var className = node.className || '';
var language = (className.match(highlightRegExp) || [null, ''])[1];
return (
'\n\n' + options.fence + language + '\n' +
node.firstChild.textContent +
'\n' + options.fence + '\n\n'
)
}
});
}
function strikethrough (turndownService) {
turndownService.addRule('strikethrough', {
filter: ['del', 's', 'strike'],
replacement: function (content) {
return '~' + content + '~'
}
});
}
var indexOf = Array.prototype.indexOf;
var every = Array.prototype.every;
var rules = {};
rules.tableCell = {
filter: ['th', 'td'],
replacement: function (content, node) {
return cell(content, node)
}
};
rules.tableRow = {
filter: 'tr',
replacement: function (content, node) {
var borderCells = '';
var alignMap = { left: ':--', right: '--:', center: ':-:' };
if (isHeadingRow(node)) {
for (var i = 0; i < node.childNodes.length; i++) {
var border = '---';
var align = (
node.childNodes[i].getAttribute('align') || ''
).toLowerCase();
if (align) border = alignMap[align] || border;
borderCells += cell(border, node.childNodes[i]);
}
}
return '\n' + content + (borderCells ? '\n' + borderCells : '')
}
};
rules.table = {
// Only convert tables with a heading row.
// Tables with no heading row are kept using `keep` (see below).
filter: function (node) {
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
},
replacement: function (content) {
// Ensure there are no blank lines
content = content.replace('\n\n', '\n');
return '\n\n' + content + '\n\n'
}
};
rules.tableSection = {
filter: ['thead', 'tbody', 'tfoot'],
replacement: function (content) {
return content
}
};
// A tr is a heading row if:
// - the parent is a THEAD
// - or if its the first child of the TABLE or the first TBODY (possibly
// following a blank THEAD)
// - and every cell is a TH
function isHeadingRow (tr) {
var parentNode = tr.parentNode;
return (
parentNode.nodeName === 'THEAD' ||
(
parentNode.firstChild === tr &&
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
)
)
}
function isFirstTbody (element) {
var previousSibling = element.previousSibling;
return (
element.nodeName === 'TBODY' && (
!previousSibling ||
(
previousSibling.nodeName === 'THEAD' &&
/^\s*$/i.test(previousSibling.textContent)
)
)
)
}
function cell (content, node) {
var index = indexOf.call(node.parentNode.childNodes, node);
var prefix = ' ';
if (index === 0) prefix = '| ';
return prefix + content + ' |'
}
function tables (turndownService) {
turndownService.keep(function (node) {
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
});
for (var key in rules) turndownService.addRule(key, rules[key]);
}
function taskListItems (turndownService) {
turndownService.addRule('taskListItems', {
filter: function (node) {
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
},
replacement: function (content, node) {
return (node.checked ? '[x]' : '[ ]') + ' '
}
});
}
function gfm (turndownService) {
turndownService.use([
highlightedCodeBlock,
strikethrough,
tables,
taskListItems
]);
}
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems };

View File

@ -1,43 +0,0 @@
{
"name": "turndown-plugin-gfm",
"description": "Turndown plugin to add GitHub Flavored Markdown extensions.",
"version": "1.0.2",
"author": "Dom Christie",
"main": "lib/turndown-plugin-gfm.cjs.js",
"module": "lib/turndown-plugin-gfm.es.js",
"jsnext:main": "lib/turndown-plugin-gfm.es.js",
"devDependencies": {
"browserify": "^14.5.0",
"rollup": "^0.50.0",
"standard": "^10.0.3",
"turndown": "4.0.1",
"turndown-attendant": "0.0.2"
},
"files": [
"lib",
"dist"
],
"keywords": [
"turndown",
"turndown-plugin",
"html-to-markdown",
"html",
"markdown",
"github-flavored-markdown",
"gfm"
],
"license": "MIT",
"repository": {
"type": "git",
"url": "https://github.com/domchristie/turndown-plugin-gfm.git"
},
"scripts": {
"build": "npm run build-cjs && npm run build-es && npm run build-iife && npm run build-test",
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
"build-iife": "rollup -c config/rollup.config.iife.js",
"build-test": "browserify test/turndown-plugin-gfm-test.js --outfile test/turndown-plugin-gfm-test.browser.js",
"prepublish": "npm run build",
"test": "npm run build && standard ./src/**/*.js && node test/turndown-plugin-gfm-test.js"
}
}

23
package-lock.json generated
View File

@ -11,12 +11,11 @@
"dependencies": {
"@7c/validurl": "^0.0.3",
"@mozilla/readability": "^0.3.0",
"body-parser": "^1.19.1",
"express": "^4.17.1",
"express-rate-limit": "^6.0.5",
"html-entities": "^2.3.2",
"jsdom": "^16.4.0",
"turndown": "^7.0.0",
"turndown-plugin-gfm": "^1.0.2",
"url": "^0.11.0"
}
},
@ -500,6 +499,11 @@
"node": ">=10"
}
},
"node_modules/html-entities": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
},
"node_modules/http-errors": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.8.1.tgz",
@ -1006,11 +1010,6 @@
"domino": "^2.1.6"
}
},
"node_modules/turndown-plugin-gfm": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
},
"node_modules/type-check": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",
@ -1539,6 +1538,11 @@
"whatwg-encoding": "^1.0.5"
}
},
"html-entities": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/html-entities/-/html-entities-2.3.2.tgz",
"integrity": "sha512-c3Ab/url5ksaT0WyleslpBEthOzWhrjQbg75y7XUsfSzi3Dgzt0l8w5e7DylRn15MTlMMD58dTfzddNS2kcAjQ=="
},
"http-errors": {
"version": "1.8.1",
"resolved": "https://registry.npmjs.org/http-errors/-/http-errors-1.8.1.tgz",
@ -1913,11 +1917,6 @@
"domino": "^2.1.6"
}
},
"turndown-plugin-gfm": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
},
"type-check": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",

View File

@ -6,12 +6,11 @@
"dependencies": {
"@7c/validurl": "^0.0.3",
"@mozilla/readability": "^0.3.0",
"body-parser": "^1.19.1",
"express": "^4.17.1",
"express-rate-limit": "^6.0.5",
"html-entities": "^2.3.2",
"jsdom": "^16.4.0",
"turndown": "^7.0.0",
"turndown-plugin-gfm": "^1.0.2",
"url": "^0.11.0"
},
"scripts": {