rollback code-block enhancement due to issues downstream with ToMarkdown
parent
4c66e91b73
commit
b73a9cef1b
39
index.js
39
index.js
|
@ -8,7 +8,6 @@ const table_to_markdown = require('./html_table_to_markdown.js');
|
||||||
const validURL = require('@7c/validurl');
|
const validURL = require('@7c/validurl');
|
||||||
const express = require('express');
|
const express = require('express');
|
||||||
const rateLimit = require('express-rate-limit');
|
const rateLimit = require('express-rate-limit');
|
||||||
const htmlentities = require('html-entities');
|
|
||||||
|
|
||||||
const port = process.env.PORT;
|
const port = process.env.PORT;
|
||||||
|
|
||||||
|
@ -72,14 +71,14 @@ app.post('/', function(req, res) {
|
||||||
if (!html) {
|
if (!html) {
|
||||||
res.status(400).send("Please provide a POST parameter called html");
|
res.status(400).send("Please provide a POST parameter called html");
|
||||||
} else {
|
} else {
|
||||||
//try {
|
try {
|
||||||
let document = new JSDOM(html);
|
let document = new JSDOM(html);
|
||||||
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
||||||
send_headers(res);
|
send_headers(res);
|
||||||
res.send(markdown);
|
res.send(markdown);
|
||||||
//} catch (error) {
|
} catch (error) {
|
||||||
// res.status(400).send("Could not parse that document");
|
res.status(400).send("Could not parse that document");
|
||||||
//}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
});
|
});
|
||||||
|
@ -100,8 +99,7 @@ function process_dom(url, document, res, inline_title, ignore_links) {
|
||||||
let reader = new Readability(document.window.document);
|
let reader = new Readability(document.window.document);
|
||||||
let readable = reader.parse().content;
|
let readable = reader.parse().content;
|
||||||
let replacements = []
|
let replacements = []
|
||||||
readable = format_tables(readable, replacements);
|
readable = format_tables(readable, replacements);
|
||||||
readable = format_code_blocks(readable, replacements);
|
|
||||||
let markdown = service.turndown(readable);
|
let markdown = service.turndown(readable);
|
||||||
for (let i=0;i<replacements.length;i++) {
|
for (let i=0;i<replacements.length;i++) {
|
||||||
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
|
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
|
||||||
|
@ -152,30 +150,3 @@ function format_tables(html, replacements) {
|
||||||
return html;
|
return html;
|
||||||
}
|
}
|
||||||
|
|
||||||
function format_code_blocks(html, replacements) {
|
|
||||||
const start = replacements.length;
|
|
||||||
const code_blocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
|
|
||||||
if (code_blocks) {
|
|
||||||
for (let cb=0;cb<code_blocks.length;cb++) {
|
|
||||||
let code_block = code_blocks[cb];
|
|
||||||
let markdown = code_block_to_markdown(code_block);
|
|
||||||
let placeholder = "urltomarkdowncodeblockplaceholder"+cb+Math.random();
|
|
||||||
replacements[start+cb] = { placeholder: placeholder, replacement: markdown};
|
|
||||||
html = html.replace(code_block, "<p>"+placeholder+"</p>");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return html;
|
|
||||||
}
|
|
||||||
|
|
||||||
function code_block_to_markdown (html) {
|
|
||||||
const match_pre = /^<pre[^>]*>([\s\S]*)<\/pre>$/ig.exec(html);
|
|
||||||
let inner_html = match_pre[1];
|
|
||||||
const match_code = /^\s*<code[^>]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
|
|
||||||
if (match_code && match_code[1])
|
|
||||||
inner_html = match_code[1];
|
|
||||||
inner_html = inner_html.replaceAll(/<br[^>]*>/ig,"\n");
|
|
||||||
inner_html = inner_html.replaceAll(/<[^>]+>/ig, "");
|
|
||||||
inner_html = htmlentities.decode(inner_html);
|
|
||||||
const markdown = "```\n"+inner_html+"\n```\n";
|
|
||||||
return markdown;
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue