parse code blocks

main
macsplit 2022-05-12 15:09:43 +01:00
parent 92b4790bd0
commit 536bbea0ea
1 changed files with 36 additions and 11 deletions

View File

@ -72,11 +72,11 @@ app.post('/', function(req, res) {
res.status(400).send("Please provide a POST parameter called html");
} else {
try {
let document = new JSDOM(html);
let markdown = process_dom(url, document, res, inline_title, ignore_links);
send_headers(res);
res.send(markdown);
} catch (error) {
let document = new JSDOM(html);
let markdown = process_dom(url, document, res, inline_title, ignore_links);
send_headers(res);
res.send(markdown);
} catch (error) {
res.status(400).send("Could not parse that document");
}
}
@ -98,11 +98,12 @@ function process_dom(url, document, res, inline_title, ignore_links) {
res.header("X-Title", encodeURIComponent(title.textContent));
let reader = new Readability(document.window.document);
let readable = reader.parse().content;
let replacement = {placeholders:[], tables:[]}
readable = format_tables(readable, replacement);
let replacements = []
readable = format_tables(readable, replacements);
readable = format_code_blocks(readable, replacements);
let markdown = service.turndown(readable);
for (let i=0;i<replacement.placeholders.length;i++) {
markdown = markdown.replace(replacement.placeholders[i], replacement.tables[i]);
for (let i=0;i<replacements.length;i++) {
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
}
let result = (url) ? common_filters.filter(url, markdown, ignore_links) : markdown;
if (inline_title && title) {
@ -136,17 +137,41 @@ function read_apple_url(url, res, inline_title, ignore_links) {
}
function format_tables(html, replacements) {
const start = replacements.length;
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
if (tables) {
for (let t=0;t<tables.length;t++) {
let table = tables[t];
let markdown = table_to_markdown.convert(table);
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
replacements.placeholders[t] = placeholder;
replacements.tables[t] = markdown;
replacements[start+t] = { placeholder: placeholder, replacement: markdown};
html = html.replace(table, "<p>"+placeholder+"</p>");
}
}
return html;
}
function format_code_blocks(html, replacements) {
const start = replacements.length;
const code_blocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
if (code_blocks) {
for (let cb=0;cb<code_blocks.length;cb++) {
let code_block = code_blocks[cb];
let markdown = code_block_to_markdown(code_block);
let placeholder = "urltomarkdowncodeblockplaceholder"+cb+Math.random();
replacements[start+cb] = { placeholder: placeholder, replacement: markdown};
html = html.replace(code_block, "<p>"+placeholder+"</p>");
}
}
return html;
}
function code_block_to_markdown (html) {
const match_pre = /^<pre[^>]*>([\s\S]*)<\/pre>$/ig.exec(html);
let inner_html = match_pre[1];
const match_code = /^\s*<code[^>]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
if (match_code && match_code[1])
inner_html = match_code[1];
const markdown = "```\n"+inner_html+"\n```\n";
return markdown;
}