another attempt at parsing code blocks better

main
Lee Hanken 2022-11-22 11:37:33 +00:00
parent ca3226abf0
commit faf75dc6fb
2567 changed files with 23 additions and 1 deletions

0
html_table_to_markdown.js Normal file → Executable file
View File

View File

@ -8,6 +8,7 @@ const table_to_markdown = require('./html_table_to_markdown.js');
const validURL = require('@7c/validurl'); const validURL = require('@7c/validurl');
const express = require('express'); const express = require('express');
const rateLimit = require('express-rate-limit'); const rateLimit = require('express-rate-limit');
const htmlEntities = require('html-entities');
const port = process.env.PORT; const port = process.env.PORT;
@ -107,6 +108,7 @@ function process_dom(url, document, res, inline_title, ignore_links) {
let readable = reader.parse().content; let readable = reader.parse().content;
let replacements = [] let replacements = []
readable = format_tables(readable, replacements); readable = format_tables(readable, replacements);
readable = format_codeblocks(readable, replacements);
let markdown = service.turndown(readable); let markdown = service.turndown(readable);
for (let i=0;i<replacements.length;i++) { for (let i=0;i<replacements.length;i++) {
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement); markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
@ -147,7 +149,7 @@ function format_tables(html, replacements) {
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi); const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
if (tables) { if (tables) {
for (let t=0;t<tables.length;t++) { for (let t=0;t<tables.length;t++) {
let table = tables[t]; const table = tables[t];
let markdown = table_to_markdown.convert(table); let markdown = table_to_markdown.convert(table);
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random(); let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
replacements[start+t] = { placeholder: placeholder, replacement: markdown}; replacements[start+t] = { placeholder: placeholder, replacement: markdown};
@ -157,3 +159,23 @@ function format_tables(html, replacements) {
return html; return html;
} }
function format_codeblocks(html, replacements) {
const start = replacements.length;
const codeblocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
if (codeblocks) {
for (let c=0;c<codeblocks.length;c++) {
const codeblock = codeblocks[c];
let filtered = codeblock;
filtered = filtered.replace(/<br[^>]*>/g, "\n");
filtered = filtered.replace(/<p>/g, "\n");
filtered = filtered.replace(/<\/?[^>]+(>|$)/g, "");
filtered = htmlEntities.decode(filtered);
let markdown = "```\n"+filtered+"\n```\n";
let placeholder = "urltomarkdowncodeblockplaceholder"+c+Math.random();
replacements[start+c] = { placeholder: placeholder, replacement: markdown};
html = html.replace(codeblock, "<p>"+placeholder+"</p>");
}
}
return html;
}

0
license.txt Normal file → Executable file
View File

0
node_modules/.package-lock.json generated vendored Normal file → Executable file
View File

0
node_modules/@7c/validurl/README.md generated vendored Normal file → Executable file
View File

0
node_modules/@7c/validurl/package.json generated vendored Normal file → Executable file
View File

0
node_modules/@7c/validurl/tests/validURL.js generated vendored Normal file → Executable file
View File

0
node_modules/@7c/validurl/validURL.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/.eslintrc.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/CODE_OF_CONDUCT.md generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/JSDOMParser.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/README.md generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/Readability-readerable.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/Readability.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/index.js generated vendored Normal file → Executable file
View File

0
node_modules/@mozilla/readability/package.json generated vendored Normal file → Executable file
View File

0
node_modules/@tootallnate/once/dist/index.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/@tootallnate/once/dist/index.js generated vendored Normal file → Executable file
View File

0
node_modules/@tootallnate/once/dist/index.js.map generated vendored Normal file → Executable file
View File

0
node_modules/@tootallnate/once/package.json generated vendored Normal file → Executable file
View File

0
node_modules/abab/CHANGELOG.md generated vendored Normal file → Executable file
View File

0
node_modules/abab/LICENSE.md generated vendored Normal file → Executable file
View File

0
node_modules/abab/README.md generated vendored Normal file → Executable file
View File

0
node_modules/abab/index.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/abab/index.js generated vendored Normal file → Executable file
View File

0
node_modules/abab/lib/atob.js generated vendored Normal file → Executable file
View File

0
node_modules/abab/lib/btoa.js generated vendored Normal file → Executable file
View File

0
node_modules/abab/package.json generated vendored Normal file → Executable file
View File

0
node_modules/accepts/HISTORY.md generated vendored Normal file → Executable file
View File

0
node_modules/accepts/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/accepts/README.md generated vendored Normal file → Executable file
View File

0
node_modules/accepts/index.js generated vendored Normal file → Executable file
View File

0
node_modules/accepts/package.json generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/README.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/index.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/CHANGELOG.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/README.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.js.map generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/acorn.mjs.map generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/dist/bin.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/node_modules/acorn/package.json generated vendored Normal file → Executable file
View File

0
node_modules/acorn-globals/package.json generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/CHANGELOG.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/README.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/dist/walk.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/dist/walk.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/dist/walk.js.map generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/dist/walk.mjs generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/dist/walk.mjs.map generated vendored Normal file → Executable file
View File

0
node_modules/acorn-walk/package.json generated vendored Normal file → Executable file
View File

0
node_modules/acorn/CHANGELOG.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/acorn/README.md generated vendored Normal file → Executable file
View File

0
node_modules/acorn/dist/acorn.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/acorn/dist/acorn.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn/dist/acorn.mjs generated vendored Normal file → Executable file
View File

0
node_modules/acorn/dist/acorn.mjs.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/acorn/dist/bin.js generated vendored Normal file → Executable file
View File

0
node_modules/acorn/package.json generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/README.md generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/index.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/index.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/index.js.map generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/promisify.d.ts generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/promisify.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/dist/src/promisify.js.map generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/README.md generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/package.json generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/src/browser.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/src/common.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/src/index.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/debug/src/node.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/ms/index.js generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/ms/license.md generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/ms/package.json generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/node_modules/ms/readme.md generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/package.json generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/src/index.ts generated vendored Normal file → Executable file
View File

0
node_modules/agent-base/src/promisify.ts generated vendored Normal file → Executable file
View File

0
node_modules/array-flatten/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/array-flatten/README.md generated vendored Normal file → Executable file
View File

0
node_modules/array-flatten/array-flatten.js generated vendored Normal file → Executable file
View File

0
node_modules/array-flatten/package.json generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/LICENSE generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/README.md generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/bench.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/index.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/lib/abort.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/lib/async.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/lib/defer.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/lib/iterate.js generated vendored Normal file → Executable file
View File

0
node_modules/asynckit/lib/readable_asynckit.js generated vendored Normal file → Executable file
View File

Some files were not shown because too many files have changed in this diff Show More