parse code blocks
parent
92b4790bd0
commit
536bbea0ea
47
index.js
47
index.js
|
@ -72,11 +72,11 @@ app.post('/', function(req, res) {
|
|||
res.status(400).send("Please provide a POST parameter called html");
|
||||
} else {
|
||||
try {
|
||||
let document = new JSDOM(html);
|
||||
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
||||
send_headers(res);
|
||||
res.send(markdown);
|
||||
} catch (error) {
|
||||
let document = new JSDOM(html);
|
||||
let markdown = process_dom(url, document, res, inline_title, ignore_links);
|
||||
send_headers(res);
|
||||
res.send(markdown);
|
||||
} catch (error) {
|
||||
res.status(400).send("Could not parse that document");
|
||||
}
|
||||
}
|
||||
|
@ -98,11 +98,12 @@ function process_dom(url, document, res, inline_title, ignore_links) {
|
|||
res.header("X-Title", encodeURIComponent(title.textContent));
|
||||
let reader = new Readability(document.window.document);
|
||||
let readable = reader.parse().content;
|
||||
let replacement = {placeholders:[], tables:[]}
|
||||
readable = format_tables(readable, replacement);
|
||||
let replacements = []
|
||||
readable = format_tables(readable, replacements);
|
||||
readable = format_code_blocks(readable, replacements);
|
||||
let markdown = service.turndown(readable);
|
||||
for (let i=0;i<replacement.placeholders.length;i++) {
|
||||
markdown = markdown.replace(replacement.placeholders[i], replacement.tables[i]);
|
||||
for (let i=0;i<replacements.length;i++) {
|
||||
markdown = markdown.replace(replacements[i].placeholder, replacements[i].replacement);
|
||||
}
|
||||
let result = (url) ? common_filters.filter(url, markdown, ignore_links) : markdown;
|
||||
if (inline_title && title) {
|
||||
|
@ -136,17 +137,41 @@ function read_apple_url(url, res, inline_title, ignore_links) {
|
|||
}
|
||||
|
||||
function format_tables(html, replacements) {
|
||||
const start = replacements.length;
|
||||
const tables = html.match(/(<table[^>]*>(?:.|\n)*?<\/table>)/gi);
|
||||
if (tables) {
|
||||
for (let t=0;t<tables.length;t++) {
|
||||
let table = tables[t];
|
||||
let markdown = table_to_markdown.convert(table);
|
||||
let placeholder = "urltomarkdowntableplaceholder"+t+Math.random();
|
||||
replacements.placeholders[t] = placeholder;
|
||||
replacements.tables[t] = markdown;
|
||||
replacements[start+t] = { placeholder: placeholder, replacement: markdown};
|
||||
html = html.replace(table, "<p>"+placeholder+"</p>");
|
||||
}
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
function format_code_blocks(html, replacements) {
|
||||
const start = replacements.length;
|
||||
const code_blocks = html.match(/(<pre[^>]*>(?:.|\n)*?<\/pre>)/gi);
|
||||
if (code_blocks) {
|
||||
for (let cb=0;cb<code_blocks.length;cb++) {
|
||||
let code_block = code_blocks[cb];
|
||||
let markdown = code_block_to_markdown(code_block);
|
||||
let placeholder = "urltomarkdowncodeblockplaceholder"+cb+Math.random();
|
||||
replacements[start+cb] = { placeholder: placeholder, replacement: markdown};
|
||||
html = html.replace(code_block, "<p>"+placeholder+"</p>");
|
||||
}
|
||||
}
|
||||
return html;
|
||||
}
|
||||
|
||||
function code_block_to_markdown (html) {
|
||||
const match_pre = /^<pre[^>]*>([\s\S]*)<\/pre>$/ig.exec(html);
|
||||
let inner_html = match_pre[1];
|
||||
const match_code = /^\s*<code[^>]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
|
||||
if (match_code && match_code[1])
|
||||
inner_html = match_code[1];
|
||||
const markdown = "```\n"+inner_html+"\n```\n";
|
||||
return markdown;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue