parse code blocks
parent
92b4790bd0
commit
536bbea0ea
47
index.js
47
index.js
|
@ -72,11 +72,11 @@ app.post('/', function(req, res) {
|
||||||
res.status(400).send("Please provide a POST parameter called html");
|
res.status(400).send("Please provide a POST parameter called html");
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
let document = new JSDOM(html);
|
let document = new JSDOM(html);
|
||||||
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
||||||
send_headers(res);
|
send_headers(res);
|
||||||
res.send(markdown);
|
res.send(markdown);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
res.status(400).send("Could not parse that document");
|
res.status(400).send("Could not parse that document");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -98,11 +98,12 @@ function process_dom(url, document, res, inline_title, ignore_links) {
|
||||||
res.header("X-Title", encodeURIComponent(title.textContent));
|
res.header("X-Title", encodeURIComponent(title.textContent));
|
||||||
let reader = new Readability(document.window.document);
|
let reader = new Readability(document.window.document);
|
||||||
let readable = reader.parse().content;
|
let readable = reader.parse().content;
|
||||||
let replacement = {placeholders:[], tables:[]}
|
let replacements = []
|
||||||
readable = format_tables(readable, replacement);
|
readable = format_tables(readable, replacements);
|
||||||
|
readable = format_code_blocks(readable, replacements);
|
||||||
let markdown = service.turndown(readable);
|
let markdown = service.turndown(readable);
|
||||||
for (let i=0;i<replacement.placeholders.length;i++) {
|
for (let i=0;i<replacements.length;i++) {
|
||||||
markdown = markdown.replace(replacement.placeholders[i], replacement.tables[i]);
|
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
|
||||||
}
|
}
|
||||||
let result = (url) ? common_filters.filter(url, markdown, ignore_links) : markdown;
|
let result = (url) ? common_filters.filter(url, markdown, ignore_links) : markdown;
|
||||||
if (inline_title && title) {
|
if (inline_title && title) {
|
||||||
|
@ -136,17 +137,41 @@ function read_apple_url(url, res, inline_title, ignore_links) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function format_tables(html, replacements) {
|
function format_tables(html, replacements) {
|
||||||
|
const start = replacements.length;
|
||||||
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
|
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
|
||||||
if (tables) {
|
if (tables) {
|
||||||
for (let t=0;t<tables.length;t++) {
|
for (let t=0;t<tables.length;t++) {
|
||||||
let table = tables[t];
|
let table = tables[t];
|
||||||
let markdown = table_to_markdown.convert(table);
|
let markdown = table_to_markdown.convert(table);
|
||||||
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
|
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
|
||||||
replacements.placeholders[t] = placeholder;
|
replacements[start+t] = { placeholder: placeholder, replacement: markdown};
|
||||||
replacements.tables[t] = markdown;
|
|
||||||
html = html.replace(table, "<p>"+placeholder+"</p>");
|
html = html.replace(table, "<p>"+placeholder+"</p>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return html;
|
return html;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function format_code_blocks(html, replacements) {
|
||||||
|
const start = replacements.length;
|
||||||
|
const code_blocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
|
||||||
|
if (code_blocks) {
|
||||||
|
for (let cb=0;cb<code_blocks.length;cb++) {
|
||||||
|
let code_block = code_blocks[cb];
|
||||||
|
let markdown = code_block_to_markdown(code_block);
|
||||||
|
let placeholder = "urltomarkdowncodeblockplaceholder"+cb+Math.random();
|
||||||
|
replacements[start+cb] = { placeholder: placeholder, replacement: markdown};
|
||||||
|
html = html.replace(code_block, "<p>"+placeholder+"</p>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
function code_block_to_markdown (html) {
|
||||||
|
const match_pre = /^<pre[^>]*>([\s\S]*)<\/pre>$/ig.exec(html);
|
||||||
|
let inner_html = match_pre[1];
|
||||||
|
const match_code = /^\s*<code[^>]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
|
||||||
|
if (match_code && match_code[1])
|
||||||
|
inner_html = match_code[1];
|
||||||
|
const markdown = "```\n"+inner_html+"\n```\n";
|
||||||
|
return markdown;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue