another attempt at parsing code blocks better
parent
ca3226abf0
commit
faf75dc6fb
24
index.js
24
index.js
|
@ -8,6 +8,7 @@ const table_to_markdown = require('./html_table_to_markdown.js');
|
|||
const validURL = require('@7c/validurl');
|
||||
const express = require('express');
|
||||
const rateLimit = require('express-rate-limit');
|
||||
const htmlEntities = require('html-entities');
|
||||
|
||||
const port = process.env.PORT;
|
||||
|
||||
|
@ -107,6 +108,7 @@ function process_dom(url, document, res, inline_title, ignore_links) {
|
|||
let readable = reader.parse().content;
|
||||
let replacements = []
|
||||
readable = format_tables(readable, replacements);
|
||||
readable = format_codeblocks(readable, replacements);
|
||||
let markdown = service.turndown(readable);
|
||||
for (let i=0;i<replacements.length;i++) {
|
||||
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
|
||||
|
@ -147,7 +149,7 @@ function format_tables(html, replacements) {
|
|||
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
|
||||
if (tables) {
|
||||
for (let t=0;t<tables.length;t++) {
|
||||
let table = tables[t];
|
||||
const table = tables[t];
|
||||
let markdown = table_to_markdown.convert(table);
|
||||
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
|
||||
replacements[start+t] = { placeholder: placeholder, replacement: markdown};
|
||||
|
@ -157,3 +159,23 @@ function format_tables(html, replacements) {
|
|||
return html;
|
||||
}
|
||||
|
||||
function format_codeblocks(html, replacements) {
|
||||
const start = replacements.length;
|
||||
const codeblocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
|
||||
if (codeblocks) {
|
||||
for (let c=0;c<codeblocks.length;c++) {
|
||||
const codeblock = codeblocks[c];
|
||||
let filtered = codeblock;
|
||||
filtered = filtered.replace(/<br[^>]*>/g, "\n");
|
||||
filtered = filtered.replace(/<p>/g, "\n");
|
||||
filtered = filtered.replace(/<\/?[^>]+(>|$)/g, "");
|
||||
filtered = htmlEntities.decode(filtered);
|
||||
let markdown = "```\n"+filtered+"\n```\n";
|
||||
let placeholder = "urltomarkdowncodeblockplaceholder"+c+Math.random();
|
||||
replacements[start+c] = { placeholder: placeholder, replacement: markdown};
|
||||
html = html.replace(codeblock, "<p>"+placeholder+"</p>");
|
||||
}
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
|
|
0
node_modules/@mozilla/readability/CODE_OF_CONDUCT.md
generated
vendored
Normal file → Executable file
0
node_modules/@mozilla/readability/CODE_OF_CONDUCT.md
generated
vendored
Normal file → Executable file
0
node_modules/@mozilla/readability/Readability-readerable.js
generated
vendored
Normal file → Executable file
0
node_modules/@mozilla/readability/Readability-readerable.js
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/CHANGELOG.md
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/CHANGELOG.md
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/LICENSE
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/LICENSE
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/README.md
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/README.md
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.d.ts
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.d.ts
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js.map
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js.map
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.d.ts
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.d.ts
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.map
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.map
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/bin.js
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/dist/bin.js
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/package.json
generated
vendored
Normal file → Executable file
0
node_modules/acorn-globals/node_modules/acorn/package.json
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/dist/src/promisify.js.map
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/dist/src/promisify.js.map
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/LICENSE
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/LICENSE
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/README.md
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/README.md
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/package.json
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/package.json
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/browser.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/browser.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/common.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/common.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/index.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/index.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/node.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/debug/src/node.js
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/license.md
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/license.md
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/package.json
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/package.json
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/readme.md
generated
vendored
Normal file → Executable file
0
node_modules/agent-base/node_modules/ms/readme.md
generated
vendored
Normal file → Executable file
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue