process tables differently
parent
9b9bc6e104
commit
4db476881a
54
index.js
54
index.js
|
@ -1,16 +1,21 @@
|
|||
const https = require('https');
|
||||
const turndown = require('turndown');
|
||||
const turndownPluginGfm = require('turndown-plugin-gfm')
|
||||
const { Readability } = require('@mozilla/readability');
|
||||
const JSDOM = require('jsdom').JSDOM;
|
||||
const common_filters = require('./url_to_markdown_common_filters');
|
||||
const validURL = require('@7c/validurl');
|
||||
const express = require('express');
|
||||
const app = express();
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const service = new turndown();
|
||||
|
||||
const port = process.env.PORT;
|
||||
|
||||
const app = express();
|
||||
|
||||
const service = new turndown();
|
||||
const tables = turndownPluginGfm.tables
|
||||
service.use(tables)
|
||||
|
||||
const rateLimiter = rateLimit({
|
||||
windowMs: 30 * 1000,
|
||||
max: 5,
|
||||
|
@ -21,7 +26,8 @@ const rateLimiter = rateLimit({
|
|||
app.use(rateLimiter);
|
||||
|
||||
app.use(express.urlencoded({
|
||||
extended: true
|
||||
extended: true,
|
||||
limit: '10mb'
|
||||
}));
|
||||
|
||||
app.get('/', (req, res) => {
|
||||
|
@ -35,21 +41,19 @@ app.get('/', (req, res) => {
|
|||
});
|
||||
|
||||
app.post('/', function(req, res) {
|
||||
let html;
|
||||
|
||||
if (req.body) {
|
||||
html = req.body.html;
|
||||
if (!html) {
|
||||
res.status(400).send("Please provide a POST parameter called html");
|
||||
} else {
|
||||
try {
|
||||
let document = new JSDOM(html);
|
||||
let markdown = process_dom(document, res);
|
||||
res.send(markdown);
|
||||
} catch (error) {
|
||||
res.status(400).send("Could not parse that document");
|
||||
}
|
||||
}
|
||||
let html = req.body.html;
|
||||
let url = req.body.url;
|
||||
if (!html) {
|
||||
res.status(400).send("Please provide a POST parameter called html");
|
||||
} else {
|
||||
//try {
|
||||
let document = new JSDOM(html);
|
||||
let markdown = process_dom(url, document, res);
|
||||
send_headers(res);
|
||||
res.send(markdown);
|
||||
//} catch (error) {
|
||||
// res.status(400).send("Could not parse that document");
|
||||
//}
|
||||
}
|
||||
|
||||
});
|
||||
|
@ -63,21 +67,21 @@ function send_headers(res) {
|
|||
res.header("Content-Type", 'text/markdown');
|
||||
}
|
||||
|
||||
function process_dom(document, res) {
|
||||
function process_dom(url, document, res) {
|
||||
let title = document.window.document.querySelector('title');
|
||||
if (title)
|
||||
res.header("X-Title", encodeURIComponent(title.textContent));
|
||||
let reader = new Readability(document.window.document);
|
||||
let article = reader.parse();
|
||||
let markdown = service.turndown(article.content);
|
||||
return markdown;
|
||||
let readable = reader.parse();
|
||||
let markdown = service.turndown(readable.content);
|
||||
let result = (url) ? common_filters.filter(url, markdown) : markdown;
|
||||
return result;
|
||||
}
|
||||
|
||||
function read_url(url, res) {
|
||||
JSDOM.fromURL(url).then((document)=>{
|
||||
let markdown = process_dom(document, res);
|
||||
let result = common_filters.filter(url, markdown);
|
||||
res.send(result);
|
||||
let markdown = process_dom(url, document, res);
|
||||
res.send(markdown);
|
||||
}).catch((error)=> {
|
||||
res.status(400).send("Sorry, could not fetch and convert that URL");
|
||||
});
|
||||
|
|
|
@ -990,6 +990,11 @@
|
|||
"domino": "^2.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
|
||||
},
|
||||
"node_modules/type-check": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",
|
||||
|
@ -1051,11 +1056,6 @@
|
|||
"node": ">= 0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/valid-url": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/valid-url/-/valid-url-1.0.9.tgz",
|
||||
"integrity": "sha1-HBRHm0DxOXp1eC8RXkCGRHQzogA="
|
||||
},
|
||||
"node_modules/vary": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2017 Dom Christie
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
|
@ -0,0 +1,50 @@
|
|||
# turndown-plugin-gfm
|
||||
|
||||
A [Turndown](https://github.com/domchristie/turndown) plugin which adds GitHub Flavored Markdown extensions.
|
||||
|
||||
## Installation
|
||||
|
||||
npm:
|
||||
|
||||
```
|
||||
npm install turndown-plugin-gfm
|
||||
```
|
||||
|
||||
Browser:
|
||||
|
||||
```html
|
||||
<script src="https://unpkg.com/turndown/dist/turndown.js"></script>
|
||||
<script src="https://unpkg.com/turndown-plugin-gfm/dist/turndown-plugin-gfm.js"></script>
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```js
|
||||
// For Node.js
|
||||
var TurndownService = require('turndown')
|
||||
var turndownPluginGfm = require('turndown-plugin-gfm')
|
||||
|
||||
var gfm = turndownPluginGfm.gfm
|
||||
var turndownService = new TurndownService()
|
||||
turndownService.use(gfm)
|
||||
var markdown = turndownService.turndown('<strike>Hello world!</strike>')
|
||||
```
|
||||
|
||||
turndown-plugin-gfm is a suite of plugins which can be applied individually. The available plugins are as follows:
|
||||
|
||||
- `strikethrough` (for converting `<strike>`, `<s>`, and `<del>` elements)
|
||||
- `tables`
|
||||
- `taskListItems`
|
||||
- `gfm` (which applies all of the above)
|
||||
|
||||
So for example, if you only wish to convert tables:
|
||||
|
||||
```js
|
||||
var tables = require('turndown-plugin-gfm').tables
|
||||
var turndownService = new TurndownService()
|
||||
turndownService.use(tables)
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
turndown-plugin-gfm is copyright © 2017+ Dom Christie and released under the MIT license.
|
|
@ -0,0 +1,165 @@
|
|||
var turndownPluginGfm = (function (exports) {
|
||||
'use strict';
|
||||
|
||||
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||
|
||||
function highlightedCodeBlock (turndownService) {
|
||||
turndownService.addRule('highlightedCodeBlock', {
|
||||
filter: function (node) {
|
||||
var firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
highlightRegExp.test(node.className) &&
|
||||
firstChild &&
|
||||
firstChild.nodeName === 'PRE'
|
||||
)
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
var className = node.className || '';
|
||||
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
node.firstChild.textContent +
|
||||
'\n' + options.fence + '\n\n'
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function strikethrough (turndownService) {
|
||||
turndownService.addRule('strikethrough', {
|
||||
filter: ['del', 's', 'strike'],
|
||||
replacement: function (content) {
|
||||
return '~' + content + '~'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var indexOf = Array.prototype.indexOf;
|
||||
var every = Array.prototype.every;
|
||||
var rules = {};
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ['th', 'td'],
|
||||
replacement: function (content, node) {
|
||||
return cell(content, node)
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: 'tr',
|
||||
replacement: function (content, node) {
|
||||
var borderCells = '';
|
||||
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
var border = '---';
|
||||
var align = (
|
||||
node.childNodes[i].getAttribute('align') || ''
|
||||
).toLowerCase();
|
||||
|
||||
if (align) border = alignMap[align] || border;
|
||||
|
||||
borderCells += cell(border, node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
||||
}
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables with a heading row.
|
||||
// Tables with no heading row are kept using `keep` (see below).
|
||||
filter: function (node) {
|
||||
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace('\n\n', '\n');
|
||||
return '\n\n' + content + '\n\n'
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ['thead', 'tbody', 'tfoot'],
|
||||
replacement: function (content) {
|
||||
return content
|
||||
}
|
||||
};
|
||||
|
||||
// A tr is a heading row if:
|
||||
// - the parent is a THEAD
|
||||
// - or if its the first child of the TABLE or the first TBODY (possibly
|
||||
// following a blank THEAD)
|
||||
// - and every cell is a TH
|
||||
function isHeadingRow (tr) {
|
||||
var parentNode = tr.parentNode;
|
||||
return (
|
||||
parentNode.nodeName === 'THEAD' ||
|
||||
(
|
||||
parentNode.firstChild === tr &&
|
||||
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
||||
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function isFirstTbody (element) {
|
||||
var previousSibling = element.previousSibling;
|
||||
return (
|
||||
element.nodeName === 'TBODY' && (
|
||||
!previousSibling ||
|
||||
(
|
||||
previousSibling.nodeName === 'THEAD' &&
|
||||
/^\s*$/i.test(previousSibling.textContent)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function cell (content, node) {
|
||||
var index = indexOf.call(node.parentNode.childNodes, node);
|
||||
var prefix = ' ';
|
||||
if (index === 0) prefix = '| ';
|
||||
return prefix + content + ' |'
|
||||
}
|
||||
|
||||
function tables (turndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
||||
});
|
||||
for (var key in rules) turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
|
||||
function taskListItems (turndownService) {
|
||||
turndownService.addRule('taskListItems', {
|
||||
filter: function (node) {
|
||||
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (node.checked ? '[x]' : '[ ]') + ' '
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function gfm (turndownService) {
|
||||
turndownService.use([
|
||||
highlightedCodeBlock,
|
||||
strikethrough,
|
||||
tables,
|
||||
taskListItems
|
||||
]);
|
||||
}
|
||||
|
||||
exports.gfm = gfm;
|
||||
exports.highlightedCodeBlock = highlightedCodeBlock;
|
||||
exports.strikethrough = strikethrough;
|
||||
exports.tables = tables;
|
||||
exports.taskListItems = taskListItems;
|
||||
|
||||
return exports;
|
||||
|
||||
}({}));
|
162
node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.cjs.js
generated
vendored
Normal file
162
node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.cjs.js
generated
vendored
Normal file
|
@ -0,0 +1,162 @@
|
|||
'use strict';
|
||||
|
||||
Object.defineProperty(exports, '__esModule', { value: true });
|
||||
|
||||
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||
|
||||
function highlightedCodeBlock (turndownService) {
|
||||
turndownService.addRule('highlightedCodeBlock', {
|
||||
filter: function (node) {
|
||||
var firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
highlightRegExp.test(node.className) &&
|
||||
firstChild &&
|
||||
firstChild.nodeName === 'PRE'
|
||||
)
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
var className = node.className || '';
|
||||
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
node.firstChild.textContent +
|
||||
'\n' + options.fence + '\n\n'
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function strikethrough (turndownService) {
|
||||
turndownService.addRule('strikethrough', {
|
||||
filter: ['del', 's', 'strike'],
|
||||
replacement: function (content) {
|
||||
return '~' + content + '~'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var indexOf = Array.prototype.indexOf;
|
||||
var every = Array.prototype.every;
|
||||
var rules = {};
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ['th', 'td'],
|
||||
replacement: function (content, node) {
|
||||
return cell(content, node)
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: 'tr',
|
||||
replacement: function (content, node) {
|
||||
var borderCells = '';
|
||||
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
var border = '---';
|
||||
var align = (
|
||||
node.childNodes[i].getAttribute('align') || ''
|
||||
).toLowerCase();
|
||||
|
||||
if (align) border = alignMap[align] || border;
|
||||
|
||||
borderCells += cell(border, node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
||||
}
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables with a heading row.
|
||||
// Tables with no heading row are kept using `keep` (see below).
|
||||
filter: function (node) {
|
||||
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace('\n\n', '\n');
|
||||
return '\n\n' + content + '\n\n'
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ['thead', 'tbody', 'tfoot'],
|
||||
replacement: function (content) {
|
||||
return content
|
||||
}
|
||||
};
|
||||
|
||||
// A tr is a heading row if:
|
||||
// - the parent is a THEAD
|
||||
// - or if its the first child of the TABLE or the first TBODY (possibly
|
||||
// following a blank THEAD)
|
||||
// - and every cell is a TH
|
||||
function isHeadingRow (tr) {
|
||||
var parentNode = tr.parentNode;
|
||||
return (
|
||||
parentNode.nodeName === 'THEAD' ||
|
||||
(
|
||||
parentNode.firstChild === tr &&
|
||||
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
||||
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function isFirstTbody (element) {
|
||||
var previousSibling = element.previousSibling;
|
||||
return (
|
||||
element.nodeName === 'TBODY' && (
|
||||
!previousSibling ||
|
||||
(
|
||||
previousSibling.nodeName === 'THEAD' &&
|
||||
/^\s*$/i.test(previousSibling.textContent)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function cell (content, node) {
|
||||
var index = indexOf.call(node.parentNode.childNodes, node);
|
||||
var prefix = ' ';
|
||||
if (index === 0) prefix = '| ';
|
||||
return prefix + content + ' |'
|
||||
}
|
||||
|
||||
function tables (turndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
||||
});
|
||||
for (var key in rules) turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
|
||||
function taskListItems (turndownService) {
|
||||
turndownService.addRule('taskListItems', {
|
||||
filter: function (node) {
|
||||
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (node.checked ? '[x]' : '[ ]') + ' '
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function gfm (turndownService) {
|
||||
turndownService.use([
|
||||
highlightedCodeBlock,
|
||||
strikethrough,
|
||||
tables,
|
||||
taskListItems
|
||||
]);
|
||||
}
|
||||
|
||||
exports.gfm = gfm;
|
||||
exports.highlightedCodeBlock = highlightedCodeBlock;
|
||||
exports.strikethrough = strikethrough;
|
||||
exports.tables = tables;
|
||||
exports.taskListItems = taskListItems;
|
154
node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.es.js
generated
vendored
Normal file
154
node_modules/turndown-plugin-gfm/lib/turndown-plugin-gfm.browser.es.js
generated
vendored
Normal file
|
@ -0,0 +1,154 @@
|
|||
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||
|
||||
function highlightedCodeBlock (turndownService) {
|
||||
turndownService.addRule('highlightedCodeBlock', {
|
||||
filter: function (node) {
|
||||
var firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
highlightRegExp.test(node.className) &&
|
||||
firstChild &&
|
||||
firstChild.nodeName === 'PRE'
|
||||
)
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
var className = node.className || '';
|
||||
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
node.firstChild.textContent +
|
||||
'\n' + options.fence + '\n\n'
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function strikethrough (turndownService) {
|
||||
turndownService.addRule('strikethrough', {
|
||||
filter: ['del', 's', 'strike'],
|
||||
replacement: function (content) {
|
||||
return '~' + content + '~'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var indexOf = Array.prototype.indexOf;
|
||||
var every = Array.prototype.every;
|
||||
var rules = {};
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ['th', 'td'],
|
||||
replacement: function (content, node) {
|
||||
return cell(content, node)
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: 'tr',
|
||||
replacement: function (content, node) {
|
||||
var borderCells = '';
|
||||
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
var border = '---';
|
||||
var align = (
|
||||
node.childNodes[i].getAttribute('align') || ''
|
||||
).toLowerCase();
|
||||
|
||||
if (align) border = alignMap[align] || border;
|
||||
|
||||
borderCells += cell(border, node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
||||
}
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables with a heading row.
|
||||
// Tables with no heading row are kept using `keep` (see below).
|
||||
filter: function (node) {
|
||||
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace('\n\n', '\n');
|
||||
return '\n\n' + content + '\n\n'
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ['thead', 'tbody', 'tfoot'],
|
||||
replacement: function (content) {
|
||||
return content
|
||||
}
|
||||
};
|
||||
|
||||
// A tr is a heading row if:
|
||||
// - the parent is a THEAD
|
||||
// - or if its the first child of the TABLE or the first TBODY (possibly
|
||||
// following a blank THEAD)
|
||||
// - and every cell is a TH
|
||||
function isHeadingRow (tr) {
|
||||
var parentNode = tr.parentNode;
|
||||
return (
|
||||
parentNode.nodeName === 'THEAD' ||
|
||||
(
|
||||
parentNode.firstChild === tr &&
|
||||
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
||||
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function isFirstTbody (element) {
|
||||
var previousSibling = element.previousSibling;
|
||||
return (
|
||||
element.nodeName === 'TBODY' && (
|
||||
!previousSibling ||
|
||||
(
|
||||
previousSibling.nodeName === 'THEAD' &&
|
||||
/^\s*$/i.test(previousSibling.textContent)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function cell (content, node) {
|
||||
var index = indexOf.call(node.parentNode.childNodes, node);
|
||||
var prefix = ' ';
|
||||
if (index === 0) prefix = '| ';
|
||||
return prefix + content + ' |'
|
||||
}
|
||||
|
||||
function tables (turndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
||||
});
|
||||
for (var key in rules) turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
|
||||
function taskListItems (turndownService) {
|
||||
turndownService.addRule('taskListItems', {
|
||||
filter: function (node) {
|
||||
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (node.checked ? '[x]' : '[ ]') + ' '
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function gfm (turndownService) {
|
||||
turndownService.use([
|
||||
highlightedCodeBlock,
|
||||
strikethrough,
|
||||
tables,
|
||||
taskListItems
|
||||
]);
|
||||
}
|
||||
|
||||
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems };
|
|
@ -0,0 +1,162 @@
|
|||
'use strict';
|
||||
|
||||
Object.defineProperty(exports, '__esModule', { value: true });
|
||||
|
||||
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||
|
||||
function highlightedCodeBlock (turndownService) {
|
||||
turndownService.addRule('highlightedCodeBlock', {
|
||||
filter: function (node) {
|
||||
var firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
highlightRegExp.test(node.className) &&
|
||||
firstChild &&
|
||||
firstChild.nodeName === 'PRE'
|
||||
)
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
var className = node.className || '';
|
||||
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
node.firstChild.textContent +
|
||||
'\n' + options.fence + '\n\n'
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function strikethrough (turndownService) {
|
||||
turndownService.addRule('strikethrough', {
|
||||
filter: ['del', 's', 'strike'],
|
||||
replacement: function (content) {
|
||||
return '~' + content + '~'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var indexOf = Array.prototype.indexOf;
|
||||
var every = Array.prototype.every;
|
||||
var rules = {};
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ['th', 'td'],
|
||||
replacement: function (content, node) {
|
||||
return cell(content, node)
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: 'tr',
|
||||
replacement: function (content, node) {
|
||||
var borderCells = '';
|
||||
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
var border = '---';
|
||||
var align = (
|
||||
node.childNodes[i].getAttribute('align') || ''
|
||||
).toLowerCase();
|
||||
|
||||
if (align) border = alignMap[align] || border;
|
||||
|
||||
borderCells += cell(border, node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
||||
}
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables with a heading row.
|
||||
// Tables with no heading row are kept using `keep` (see below).
|
||||
filter: function (node) {
|
||||
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace('\n\n', '\n');
|
||||
return '\n\n' + content + '\n\n'
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ['thead', 'tbody', 'tfoot'],
|
||||
replacement: function (content) {
|
||||
return content
|
||||
}
|
||||
};
|
||||
|
||||
// A tr is a heading row if:
|
||||
// - the parent is a THEAD
|
||||
// - or if its the first child of the TABLE or the first TBODY (possibly
|
||||
// following a blank THEAD)
|
||||
// - and every cell is a TH
|
||||
function isHeadingRow (tr) {
|
||||
var parentNode = tr.parentNode;
|
||||
return (
|
||||
parentNode.nodeName === 'THEAD' ||
|
||||
(
|
||||
parentNode.firstChild === tr &&
|
||||
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
||||
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function isFirstTbody (element) {
|
||||
var previousSibling = element.previousSibling;
|
||||
return (
|
||||
element.nodeName === 'TBODY' && (
|
||||
!previousSibling ||
|
||||
(
|
||||
previousSibling.nodeName === 'THEAD' &&
|
||||
/^\s*$/i.test(previousSibling.textContent)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function cell (content, node) {
|
||||
var index = indexOf.call(node.parentNode.childNodes, node);
|
||||
var prefix = ' ';
|
||||
if (index === 0) prefix = '| ';
|
||||
return prefix + content + ' |'
|
||||
}
|
||||
|
||||
function tables (turndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
||||
});
|
||||
for (var key in rules) turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
|
||||
function taskListItems (turndownService) {
|
||||
turndownService.addRule('taskListItems', {
|
||||
filter: function (node) {
|
||||
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (node.checked ? '[x]' : '[ ]') + ' '
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function gfm (turndownService) {
|
||||
turndownService.use([
|
||||
highlightedCodeBlock,
|
||||
strikethrough,
|
||||
tables,
|
||||
taskListItems
|
||||
]);
|
||||
}
|
||||
|
||||
exports.gfm = gfm;
|
||||
exports.highlightedCodeBlock = highlightedCodeBlock;
|
||||
exports.strikethrough = strikethrough;
|
||||
exports.tables = tables;
|
||||
exports.taskListItems = taskListItems;
|
|
@ -0,0 +1,154 @@
|
|||
var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
|
||||
|
||||
function highlightedCodeBlock (turndownService) {
|
||||
turndownService.addRule('highlightedCodeBlock', {
|
||||
filter: function (node) {
|
||||
var firstChild = node.firstChild;
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
highlightRegExp.test(node.className) &&
|
||||
firstChild &&
|
||||
firstChild.nodeName === 'PRE'
|
||||
)
|
||||
},
|
||||
replacement: function (content, node, options) {
|
||||
var className = node.className || '';
|
||||
var language = (className.match(highlightRegExp) || [null, ''])[1];
|
||||
|
||||
return (
|
||||
'\n\n' + options.fence + language + '\n' +
|
||||
node.firstChild.textContent +
|
||||
'\n' + options.fence + '\n\n'
|
||||
)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function strikethrough (turndownService) {
|
||||
turndownService.addRule('strikethrough', {
|
||||
filter: ['del', 's', 'strike'],
|
||||
replacement: function (content) {
|
||||
return '~' + content + '~'
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
var indexOf = Array.prototype.indexOf;
|
||||
var every = Array.prototype.every;
|
||||
var rules = {};
|
||||
|
||||
rules.tableCell = {
|
||||
filter: ['th', 'td'],
|
||||
replacement: function (content, node) {
|
||||
return cell(content, node)
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableRow = {
|
||||
filter: 'tr',
|
||||
replacement: function (content, node) {
|
||||
var borderCells = '';
|
||||
var alignMap = { left: ':--', right: '--:', center: ':-:' };
|
||||
|
||||
if (isHeadingRow(node)) {
|
||||
for (var i = 0; i < node.childNodes.length; i++) {
|
||||
var border = '---';
|
||||
var align = (
|
||||
node.childNodes[i].getAttribute('align') || ''
|
||||
).toLowerCase();
|
||||
|
||||
if (align) border = alignMap[align] || border;
|
||||
|
||||
borderCells += cell(border, node.childNodes[i]);
|
||||
}
|
||||
}
|
||||
return '\n' + content + (borderCells ? '\n' + borderCells : '')
|
||||
}
|
||||
};
|
||||
|
||||
rules.table = {
|
||||
// Only convert tables with a heading row.
|
||||
// Tables with no heading row are kept using `keep` (see below).
|
||||
filter: function (node) {
|
||||
return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
|
||||
},
|
||||
|
||||
replacement: function (content) {
|
||||
// Ensure there are no blank lines
|
||||
content = content.replace('\n\n', '\n');
|
||||
return '\n\n' + content + '\n\n'
|
||||
}
|
||||
};
|
||||
|
||||
rules.tableSection = {
|
||||
filter: ['thead', 'tbody', 'tfoot'],
|
||||
replacement: function (content) {
|
||||
return content
|
||||
}
|
||||
};
|
||||
|
||||
// A tr is a heading row if:
|
||||
// - the parent is a THEAD
|
||||
// - or if its the first child of the TABLE or the first TBODY (possibly
|
||||
// following a blank THEAD)
|
||||
// - and every cell is a TH
|
||||
function isHeadingRow (tr) {
|
||||
var parentNode = tr.parentNode;
|
||||
return (
|
||||
parentNode.nodeName === 'THEAD' ||
|
||||
(
|
||||
parentNode.firstChild === tr &&
|
||||
(parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
|
||||
every.call(tr.childNodes, function (n) { return n.nodeName === 'TH' })
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function isFirstTbody (element) {
|
||||
var previousSibling = element.previousSibling;
|
||||
return (
|
||||
element.nodeName === 'TBODY' && (
|
||||
!previousSibling ||
|
||||
(
|
||||
previousSibling.nodeName === 'THEAD' &&
|
||||
/^\s*$/i.test(previousSibling.textContent)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
function cell (content, node) {
|
||||
var index = indexOf.call(node.parentNode.childNodes, node);
|
||||
var prefix = ' ';
|
||||
if (index === 0) prefix = '| ';
|
||||
return prefix + content + ' |'
|
||||
}
|
||||
|
||||
function tables (turndownService) {
|
||||
turndownService.keep(function (node) {
|
||||
return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
|
||||
});
|
||||
for (var key in rules) turndownService.addRule(key, rules[key]);
|
||||
}
|
||||
|
||||
function taskListItems (turndownService) {
|
||||
turndownService.addRule('taskListItems', {
|
||||
filter: function (node) {
|
||||
return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
|
||||
},
|
||||
replacement: function (content, node) {
|
||||
return (node.checked ? '[x]' : '[ ]') + ' '
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function gfm (turndownService) {
|
||||
turndownService.use([
|
||||
highlightedCodeBlock,
|
||||
strikethrough,
|
||||
tables,
|
||||
taskListItems
|
||||
]);
|
||||
}
|
||||
|
||||
export { gfm, highlightedCodeBlock, strikethrough, tables, taskListItems };
|
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"name": "turndown-plugin-gfm",
|
||||
"description": "Turndown plugin to add GitHub Flavored Markdown extensions.",
|
||||
"version": "1.0.2",
|
||||
"author": "Dom Christie",
|
||||
"main": "lib/turndown-plugin-gfm.cjs.js",
|
||||
"module": "lib/turndown-plugin-gfm.es.js",
|
||||
"jsnext:main": "lib/turndown-plugin-gfm.es.js",
|
||||
"devDependencies": {
|
||||
"browserify": "^14.5.0",
|
||||
"rollup": "^0.50.0",
|
||||
"standard": "^10.0.3",
|
||||
"turndown": "4.0.1",
|
||||
"turndown-attendant": "0.0.2"
|
||||
},
|
||||
"files": [
|
||||
"lib",
|
||||
"dist"
|
||||
],
|
||||
"keywords": [
|
||||
"turndown",
|
||||
"turndown-plugin",
|
||||
"html-to-markdown",
|
||||
"html",
|
||||
"markdown",
|
||||
"github-flavored-markdown",
|
||||
"gfm"
|
||||
],
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/domchristie/turndown-plugin-gfm.git"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "npm run build-cjs && npm run build-es && npm run build-iife && npm run build-test",
|
||||
"build-cjs": "rollup -c config/rollup.config.cjs.js && rollup -c config/rollup.config.browser.cjs.js",
|
||||
"build-es": "rollup -c config/rollup.config.es.js && rollup -c config/rollup.config.browser.es.js",
|
||||
"build-iife": "rollup -c config/rollup.config.iife.js",
|
||||
"build-test": "browserify test/turndown-plugin-gfm-test.js --outfile test/turndown-plugin-gfm-test.browser.js",
|
||||
"prepublish": "npm run build",
|
||||
"test": "npm run build && standard ./src/**/*.js && node test/turndown-plugin-gfm-test.js"
|
||||
}
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
{
|
||||
"node" : true,
|
||||
"undef": true,
|
||||
"unused": true,
|
||||
"indent": 4
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
language: node_js
|
||||
node_js:
|
||||
- "0.6"
|
||||
- "0.8"
|
||||
- "0.10"
|
|
@ -1,20 +0,0 @@
|
|||
Copyright (c) 2013 Odysseas Tsatalos and oDesk Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@ -1,10 +0,0 @@
|
|||
TAP=node_modules/.bin/tap
|
||||
LINT=node_modules/.bin/jshint
|
||||
|
||||
test: lint
|
||||
$(TAP) test/*.js
|
||||
|
||||
lint:
|
||||
$(LINT) index.js
|
||||
$(LINT) test/*.js
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
URI validation functions
|
||||
==
|
||||
[![Build Status](https://travis-ci.org/ogt/valid-url.png)](https://travis-ci.org/ogt/valid-url)
|
||||
|
||||
## Synopsis
|
||||
|
||||
Common url validation methods
|
||||
```
|
||||
var validUrl = require('valid-url');
|
||||
|
||||
if (validUrl.isUri(suspect)){
|
||||
console.log('Looks like an URI');
|
||||
} else {
|
||||
console.log('Not a URI');
|
||||
}
|
||||
```
|
||||
|
||||
Replicates the functionality of Richard Sonnen <sonnen@richardsonnen.com> perl module :
|
||||
http://search.cpan.org/~sonnen/Data-Validate-URI-0.01/lib/Data/Validate/URI.pm [full code here](http://anonscm.debian.org/gitweb/?p=users/dom/libdata-validate-uri-perl.git)
|
||||
into a nodejs module. Translated practically line by line from perl.
|
||||
It passes all the original tests.
|
||||
|
||||
## Description
|
||||
|
||||
(copied from original perl module)
|
||||
|
||||
> This module collects common URI validation routines to make input validation, and untainting easier and more readable.
|
||||
> All functions return an untainted value if the test passes, and undef if it fails. This means that you should always check for a defined status explicitly. Don't assume the return will be true.
|
||||
> The value to test is always the first (and often only) argument.
|
||||
> There are a number of other URI validation modules out there as well (see below.) This one focuses on being fast, lightweight, and relatively 'real-world'. i.e. it's good if you want to check user input, and don't need to parse out the URI/URL into chunks.
|
||||
> Right now the module focuses on HTTP URIs, since they're arguably the most common. If you have a specialized scheme you'd like to have supported, let me know.
|
||||
|
||||
## Installation
|
||||
|
||||
```
|
||||
npm install valid-url
|
||||
```
|
||||
|
||||
## Methods
|
||||
```javascript
|
||||
/*
|
||||
* @Function isUri(value)
|
||||
*
|
||||
* @Synopsis is the value a well-formed uri?
|
||||
* @Description
|
||||
Returns the untainted URI if the test value appears to be well-formed. Note that
|
||||
you may really want one of the more practical methods like is_http_uri or is_https_uri,
|
||||
since the URI standard (RFC 3986) allows a lot of things you probably don't want.
|
||||
* @Arguments
|
||||
* value The potential URI to test.
|
||||
*
|
||||
* @Returns The untainted RFC 3986 URI on success, undefined on failure.
|
||||
* @Notes
|
||||
This function does not make any attempt to check whether the URI is accessible
|
||||
or 'makes sense' in any meaningful way. It just checks that it is formatted
|
||||
correctly.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* @Function isHttpUri(value)
|
||||
* @Synopsis is the value a well-formed HTTP uri?
|
||||
* @Description
|
||||
Specialized version of isUri() that only likes http:// urls. As a result, it can
|
||||
also do a much more thorough job validating. Also, unlike isUri() it is more
|
||||
concerned with only allowing real-world URIs through. Things like relative
|
||||
hostnames are allowed by the standards, but probably aren't wise. Conversely,
|
||||
null paths aren't allowed per RFC 2616 (should be '/' instead), but are allowed
|
||||
by this function.
|
||||
|
||||
This function only works for fully-qualified URIs. /bob.html won't work.
|
||||
See RFC 3986 for the appropriate method to turn a relative URI into an absolute
|
||||
one given its context.
|
||||
|
||||
Returns the untainted URI if the test value appears to be well-formed.
|
||||
|
||||
Note that you probably want to either call this in combo with is_https_uri(). i.e.
|
||||
|
||||
if(isHttpUri(uri) || isHttpsUri(uri)) console.log('Good');
|
||||
|
||||
or use the convenience method isWebUri which is equivalent.
|
||||
|
||||
* @Arguments
|
||||
* value The potential URI to test.
|
||||
*
|
||||
* @Returns The untainted RFC 3986 URI on success, undefined on failure.
|
||||
* @Notes
|
||||
This function does not make any attempt to check whether the URI is accessible
|
||||
or 'makes sense' in any meaningful way. It just checks that it is formatted
|
||||
correctly.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* @Function isHttpsUri(value)
|
||||
* @Synopsis is the value a well-formed HTTPS uri?
|
||||
* @Description
|
||||
See is_http_uri() for details. This version only likes the https URI scheme.
|
||||
Otherwise it's identical to is_http_uri()
|
||||
* @Arguments
|
||||
* value The potential URI to test.
|
||||
*
|
||||
* @Returns The untainted RFC 3986 URI on success, undefined on failure.
|
||||
* @Notes
|
||||
This function does not make any attempt to check whether the URI is accessible
|
||||
or 'makes sense' in any meaningful way. It just checks that it is formatted
|
||||
correctly.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* @Function isWebUri(value)
|
||||
* @Synopsis is the value a well-formed HTTP or HTTPS uri?
|
||||
* @Description
|
||||
This is just a convenience method that combines isHttpUri and isHttpsUri
|
||||
to accept most common real-world URLs.
|
||||
* @Arguments
|
||||
* value The potential URI to test.
|
||||
*
|
||||
* @Returns The untainted RFC 3986 URI on success, undefined on failure.
|
||||
* @Notes
|
||||
This function does not make any attempt to check whether the URI is accessible
|
||||
or 'makes sense' in any meaningful way. It just checks that it is formatted
|
||||
correctly.
|
||||
*/
|
||||
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
RFC 3986, RFC 3966, RFC 4694, RFC 4759, RFC 4904
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
(function(module) {
|
||||
'use strict';
|
||||
|
||||
module.exports.is_uri = is_iri;
|
||||
module.exports.is_http_uri = is_http_iri;
|
||||
module.exports.is_https_uri = is_https_iri;
|
||||
module.exports.is_web_uri = is_web_iri;
|
||||
// Create aliases
|
||||
module.exports.isUri = is_iri;
|
||||
module.exports.isHttpUri = is_http_iri;
|
||||
module.exports.isHttpsUri = is_https_iri;
|
||||
module.exports.isWebUri = is_web_iri;
|
||||
|
||||
|
||||
// private function
|
||||
// internal URI spitter method - direct from RFC 3986
|
||||
var splitUri = function(uri) {
|
||||
var splitted = uri.match(/(?:([^:\/?#]+):)?(?:\/\/([^\/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?/);
|
||||
return splitted;
|
||||
};
|
||||
|
||||
function is_iri(value) {
|
||||
if (!value) {
|
||||
return;
|
||||
}
|
||||
|
||||
// check for illegal characters
|
||||
if (/[^a-z0-9\:\/\?\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=\.\-\_\~\%]/i.test(value)) return;
|
||||
|
||||
// check for hex escapes that aren't complete
|
||||
if (/%[^0-9a-f]/i.test(value)) return;
|
||||
if (/%[0-9a-f](:?[^0-9a-f]|$)/i.test(value)) return;
|
||||
|
||||
var splitted = [];
|
||||
var scheme = '';
|
||||
var authority = '';
|
||||
var path = '';
|
||||
var query = '';
|
||||
var fragment = '';
|
||||
var out = '';
|
||||
|
||||
// from RFC 3986
|
||||
splitted = splitUri(value);
|
||||
scheme = splitted[1];
|
||||
authority = splitted[2];
|
||||
path = splitted[3];
|
||||
query = splitted[4];
|
||||
fragment = splitted[5];
|
||||
|
||||
// scheme and path are required, though the path can be empty
|
||||
if (!(scheme && scheme.length && path.length >= 0)) return;
|
||||
|
||||
// if authority is present, the path must be empty or begin with a /
|
||||
if (authority && authority.length) {
|
||||
if (!(path.length === 0 || /^\//.test(path))) return;
|
||||
} else {
|
||||
// if authority is not present, the path must not start with //
|
||||
if (/^\/\//.test(path)) return;
|
||||
}
|
||||
|
||||
// scheme must begin with a letter, then consist of letters, digits, +, ., or -
|
||||
if (!/^[a-z][a-z0-9\+\-\.]*$/.test(scheme.toLowerCase())) return;
|
||||
|
||||
// re-assemble the URL per section 5.3 in RFC 3986
|
||||
out += scheme + ':';
|
||||
if (authority && authority.length) {
|
||||
out += '//' + authority;
|
||||
}
|
||||
|
||||
out += path;
|
||||
|
||||
if (query && query.length) {
|
||||
out += '?' + query;
|
||||
}
|
||||
|
||||
if (fragment && fragment.length) {
|
||||
out += '#' + fragment;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function is_http_iri(value, allowHttps) {
|
||||
if (!is_iri(value)) {
|
||||
return;
|
||||
}
|
||||
|
||||
var splitted = [];
|
||||
var scheme = '';
|
||||
var authority = '';
|
||||
var path = '';
|
||||
var port = '';
|
||||
var query = '';
|
||||
var fragment = '';
|
||||
var out = '';
|
||||
|
||||
// from RFC 3986
|
||||
splitted = splitUri(value);
|
||||
scheme = splitted[1];
|
||||
authority = splitted[2];
|
||||
path = splitted[3];
|
||||
query = splitted[4];
|
||||
fragment = splitted[5];
|
||||
|
||||
if (!scheme) return;
|
||||
|
||||
if(allowHttps) {
|
||||
if (scheme.toLowerCase() != 'https') return;
|
||||
} else {
|
||||
if (scheme.toLowerCase() != 'http') return;
|
||||
}
|
||||
|
||||
// fully-qualified URIs must have an authority section that is
|
||||
// a valid host
|
||||
if (!authority) {
|
||||
return;
|
||||
}
|
||||
|
||||
// enable port component
|
||||
if (/:(\d+)$/.test(authority)) {
|
||||
port = authority.match(/:(\d+)$/)[0];
|
||||
authority = authority.replace(/:\d+$/, '');
|
||||
}
|
||||
|
||||
out += scheme + ':';
|
||||
out += '//' + authority;
|
||||
|
||||
if (port) {
|
||||
out += port;
|
||||
}
|
||||
|
||||
out += path;
|
||||
|
||||
if(query && query.length){
|
||||
out += '?' + query;
|
||||
}
|
||||
|
||||
if(fragment && fragment.length){
|
||||
out += '#' + fragment;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
function is_https_iri(value) {
|
||||
return is_http_iri(value, true);
|
||||
}
|
||||
|
||||
function is_web_iri(value) {
|
||||
return (is_http_iri(value) || is_https_iri(value));
|
||||
}
|
||||
|
||||
})(module);
|
|
@ -1,24 +0,0 @@
|
|||
{
|
||||
"name": "valid-url",
|
||||
"description": "URI validation functions",
|
||||
"keywords": [
|
||||
"url",
|
||||
"validation",
|
||||
"check",
|
||||
"checker",
|
||||
"pattern"
|
||||
],
|
||||
"version": "1.0.9",
|
||||
"repository": {
|
||||
"url": "git://github.com/ogt/valid-url.git"
|
||||
},
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "make test"
|
||||
},
|
||||
"dependencies": {},
|
||||
"devDependencies": {
|
||||
"tap": "~0.4.3",
|
||||
"jshint": "~2.1.4"
|
||||
}
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
var test = require("tap").test,
|
||||
is_http_uri = require('../').is_http_uri;
|
||||
|
||||
test("testing is_http_uri", function (t) {
|
||||
|
||||
// valid
|
||||
t.ok(is_http_uri('http://www.richardsonnen.com/'), 'http://www.richardsonnen.com/');
|
||||
t.ok(is_http_uri('http://www.richardsonnen.com'), 'http://www.richardsonnen.com');
|
||||
t.ok(is_http_uri('http://www.richardsonnen.com/foo/bar/test.html'), 'http://www.richardsonnen.com/foo/bar/test.html');
|
||||
t.ok(is_http_uri('http://www.richardsonnen.com/?foo=bar'), 'http://www.richardsonnen.com/?foo=bar');
|
||||
t.ok(is_http_uri('http://www.richardsonnen.com:8080/test.html'), 'http://www.richardsonnen.com:8080/test.html');
|
||||
t.ok(is_http_uri('http://example.w3.org/path%20with%20spaces.html'), 'http://example.w3.org/path%20with%20spaces.html');
|
||||
t.ok(is_http_uri('http://192.168.0.1/'), 'http://192.168.0.1/');
|
||||
|
||||
// invalid
|
||||
t.notOk(is_http_uri(''), "bad: ''");
|
||||
t.notOk(is_http_uri('ftp://ftp.richardsonnen.com'), "bad: 'ftp://ftp.richardsonnen.com'");
|
||||
t.notOk(is_http_uri('http:www.richardsonnen.com'), "bad: 'http:www.richardsonnen.com'");
|
||||
t.notOk(is_http_uri('https://www.richardsonnen.com'), "bad: 'https://www.richardsonnen.com'");
|
||||
|
||||
t.end();
|
||||
});
|
|
@ -1,22 +0,0 @@
|
|||
var test = require("tap").test,
|
||||
is_https_uri = require('../').is_https_uri;
|
||||
|
||||
test("testing is_https_uri", function (t) {
|
||||
|
||||
// valid
|
||||
t.ok(is_https_uri('https://www.richardsonnen.com/'), 'https://www.richardsonnen.com/');
|
||||
t.ok(is_https_uri('https://www.richardsonnen.com'), 'https://www.richardsonnen.com');
|
||||
t.ok(is_https_uri('https://www.richardsonnen.com/foo/bar/test.html'), 'https://www.richardsonnen.com/foo/bar/test.html');
|
||||
t.ok(is_https_uri('https://www.richardsonnen.com/?foo=bar'), 'https://www.richardsonnen.com/?foo=bar');
|
||||
t.ok(is_https_uri('https://www.richardsonnen.com:8080/test.html'), 'https://www.richardsonnen.com:8080/test.html');
|
||||
t.ok(is_https_uri('https://example.w3.org/path%20with%20spaces.html'), 'http://example.w3.org/path%20with%20spaces.html');
|
||||
t.ok(is_https_uri('https://192.168.0.1/'), 'http://192.168.0.1/');
|
||||
|
||||
// invalid
|
||||
t.notOk(is_https_uri(''), "bad: ''");
|
||||
t.notOk(is_https_uri('http://www.richardsonnen.com/'), 'http://www.richardsonnen.com/');
|
||||
t.notOk(is_https_uri('ftp://ftp.richardsonnen.com'), "bad: 'ftp://ftp.richardsonnen.com'");
|
||||
t.notOk(is_https_uri('https:www.richardsonnen.com'), "bad: 'https:www.richardsonnen.com'");
|
||||
|
||||
t.end();
|
||||
});
|
|
@ -1,35 +0,0 @@
|
|||
var test = require("tap").test,
|
||||
is_uri = require('../').is_uri;
|
||||
|
||||
test("testing is_uri", function (t) {
|
||||
|
||||
// valid - from RFC 3986 for the most part
|
||||
t.ok(is_uri('http://localhost/'), 'http://localhost/');
|
||||
t.ok(is_uri('http://example.w3.org/path%20with%20spaces.html'), 'http://example.w3.org/path%20with%20spaces.html');
|
||||
t.ok(is_uri('http://example.w3.org/%20'), 'http://example.w3.org/%20');
|
||||
t.ok(is_uri('ftp://ftp.is.co.za/rfc/rfc1808.txt'), 'ftp://ftp.is.co.za/rfc/rfc1808.txt');
|
||||
t.ok(is_uri('ftp://ftp.is.co.za/../../../rfc/rfc1808.txt'), 'ftp://ftp.is.co.za/../../../rfc/rfc1808.txt');
|
||||
t.ok(is_uri('http://www.ietf.org/rfc/rfc2396.txt'), 'http://www.ietf.org/rfc/rfc2396.txt');
|
||||
t.ok(is_uri('ldap://[2001:db8::7]/c=GB?objectClass?one'), 'ldap://[2001:db8::7]/c=GB?objectClass?one');
|
||||
t.ok(is_uri('mailto:John.Doe@example.com'), 'mailto:John.Doe@example.com');
|
||||
t.ok(is_uri('news:comp.infosystems.www.servers.unix'), 'news:comp.infosystems.www.servers.unix');
|
||||
t.ok(is_uri('tel:+1-816-555-1212'), 'tel:+1-816-555-1212');
|
||||
t.ok(is_uri('telnet://192.0.2.16:80/'), 'telnet://192.0.2.16:80/');
|
||||
t.ok(is_uri('urn:oasis:names:specification:docbook:dtd:xml:4.1.2'), 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2');
|
||||
|
||||
|
||||
// invalid
|
||||
t.notOk(is_uri(''), "bad: ''");
|
||||
t.notOk(is_uri('foo'), 'bad: foo');
|
||||
t.notOk(is_uri('foo@bar'), 'bad: foo@bar');
|
||||
t.notOk(is_uri('http://<foo>'), 'bad: http://<foo>'); // illegal characters
|
||||
t.notOk(is_uri('://bob/'), 'bad: ://bob/'); // empty schema
|
||||
t.notOk(is_uri('1http://bob'), 'bad: 1http://bob/'); // bad schema
|
||||
t.notOk(is_uri('1http:////foo.html'), 'bad: 1http://bob/'); // bad path
|
||||
t.notOk(is_uri('http://example.w3.org/%illegal.html'), 'http://example.w3.org/%illegal.html');
|
||||
t.notOk(is_uri('http://example.w3.org/%a'), 'http://example.w3.org/%a'); // partial escape
|
||||
t.notOk(is_uri('http://example.w3.org/%a/foo'), 'http://example.w3.org/%a/foo'); // partial escape
|
||||
t.notOk(is_uri('http://example.w3.org/%at'), 'http://example.w3.org/%at'); // partial escape
|
||||
|
||||
t.end();
|
||||
});
|
|
@ -1,28 +0,0 @@
|
|||
var test = require("tap").test,
|
||||
is_web_uri = require('../').is_web_uri;
|
||||
|
||||
test("testing is_web_uri", function (t) {
|
||||
|
||||
// valid
|
||||
t.ok(is_web_uri('https://www.richardsonnen.com/'), 'https://www.richardsonnen.com/');
|
||||
t.ok(is_web_uri('https://www.richardsonnen.com'), 'https://www.richardsonnen.com');
|
||||
t.ok(is_web_uri('https://www.richardsonnen.com/foo/bar/test.html'), 'https://www.richardsonnen.com/foo/bar/test.html');
|
||||
t.ok(is_web_uri('https://www.richardsonnen.com/?foo=bar'), 'https://www.richardsonnen.com/?foo=bar');
|
||||
t.ok(is_web_uri('https://www.richardsonnen.com:8080/test.html'), 'https://www.richardsonnen.com:8080/test.html');
|
||||
t.ok(is_web_uri('http://www.richardsonnen.com/'), 'http://www.richardsonnen.com/');
|
||||
t.ok(is_web_uri('http://www.richardsonnen.com'), 'http://www.richardsonnen.com');
|
||||
t.ok(is_web_uri('http://www.richardsonnen.com/foo/bar/test.html'), 'http://www.richardsonnen.com/foo/bar/test.html');
|
||||
t.ok(is_web_uri('http://www.richardsonnen.com/?foo=bar'), 'http://www.richardsonnen.com/?foo=bar');
|
||||
t.ok(is_web_uri('http://www.richardsonnen.com:8080/test.html'), 'http://www.richardsonnen.com:8080/test.html');
|
||||
t.ok(is_web_uri('http://example.w3.org/path%20with%20spaces.html'), 'http://example.w3.org/path%20with%20spaces.html');
|
||||
t.ok(is_web_uri('http://192.168.0.1/'), 'http://192.168.0.1/');
|
||||
|
||||
// invalid
|
||||
t.ok(!is_web_uri(''), "bad: ''");
|
||||
t.ok(!is_web_uri('ftp://ftp.richardsonnen.com'), "bad: 'ftp://ftp.richardsonnen.com'");
|
||||
t.ok(!is_web_uri('https:www.richardsonnen.com'), "bad: 'http:www.richardsonnen.com'");
|
||||
t.ok(!is_web_uri('http:www.richardsonnen.com'), "bad: 'http:www.richardsonnen.com'");
|
||||
|
||||
|
||||
t.end();
|
||||
});
|
|
@ -11,12 +11,13 @@
|
|||
"dependencies": {
|
||||
"@7c/validurl": "^0.0.3",
|
||||
"@mozilla/readability": "^0.3.0",
|
||||
"body-parser": "^1.19.1",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^6.0.5",
|
||||
"jsdom": "^16.4.0",
|
||||
"turndown": "^7.0.0",
|
||||
"url": "^0.11.0",
|
||||
"valid-url": "^1.0.9"
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"url": "^0.11.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@7c/validurl": {
|
||||
|
@ -1005,6 +1006,11 @@
|
|||
"domino": "^2.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
|
||||
},
|
||||
"node_modules/type-check": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",
|
||||
|
@ -1066,11 +1072,6 @@
|
|||
"node": ">= 0.4.0"
|
||||
}
|
||||
},
|
||||
"node_modules/valid-url": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/valid-url/-/valid-url-1.0.9.tgz",
|
||||
"integrity": "sha1-HBRHm0DxOXp1eC8RXkCGRHQzogA="
|
||||
},
|
||||
"node_modules/vary": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
|
@ -1912,6 +1913,11 @@
|
|||
"domino": "^2.1.6"
|
||||
}
|
||||
},
|
||||
"turndown-plugin-gfm": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown-plugin-gfm/-/turndown-plugin-gfm-1.0.2.tgz",
|
||||
"integrity": "sha512-vwz9tfvF7XN/jE0dGoBei3FXWuvll78ohzCZQuOb+ZjWrs3a0XhQVomJEb2Qh4VHTPNRO4GPZh0V7VRbiWwkRg=="
|
||||
},
|
||||
"type-check": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/type-check/-/type-check-0.3.2.tgz",
|
||||
|
@ -1960,11 +1966,6 @@
|
|||
"resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz",
|
||||
"integrity": "sha1-n5VxD1CiZ5R7LMwSR0HBAoQn5xM="
|
||||
},
|
||||
"valid-url": {
|
||||
"version": "1.0.9",
|
||||
"resolved": "https://registry.npmjs.org/valid-url/-/valid-url-1.0.9.tgz",
|
||||
"integrity": "sha1-HBRHm0DxOXp1eC8RXkCGRHQzogA="
|
||||
},
|
||||
"vary": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz",
|
||||
|
|
|
@ -6,10 +6,12 @@
|
|||
"dependencies": {
|
||||
"@7c/validurl": "^0.0.3",
|
||||
"@mozilla/readability": "^0.3.0",
|
||||
"body-parser": "^1.19.1",
|
||||
"express": "^4.17.1",
|
||||
"express-rate-limit": "^6.0.5",
|
||||
"jsdom": "^16.4.0",
|
||||
"turndown": "^7.0.0",
|
||||
"turndown-plugin-gfm": "^1.0.2",
|
||||
"url": "^0.11.0"
|
||||
},
|
||||
"scripts": {
|
||||
|
|
|
@ -17,16 +17,16 @@ module.exports = {
|
|||
}
|
||||
],
|
||||
|
||||
filter: function (url, data) {
|
||||
let domain = urlparser.parse(url).hostname
|
||||
for (let i=0;i<this.list.length;i++) {
|
||||
if (domain.match(this.list[i].domain)) {
|
||||
for (let j=0;j<this.list[i].remove.length; j++) {
|
||||
data = data.replace(this.list[i].remove[j],"");
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
filter: function (url, data) {
|
||||
let domain = urlparser.parse(url).hostname
|
||||
for (let i=0;i<this.list.length;i++) {
|
||||
if (domain.match(this.list[i].domain)) {
|
||||
for (let j=0;j<this.list[i].remove.length; j++) {
|
||||
data = data.replace(this.list[i].remove[j],"");
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue