From 536bbea0ea5003a0acf2023fe0233a1c88f34c94 Mon Sep 17 00:00:00 2001
From: macsplit
Date: Thu, 12 May 2022 15:09:43 +0100
Subject: [PATCH] parse code blocks
---
index.js | 47 ++++++++++++++++++++++++++++++++++++-----------
1 file changed, 36 insertions(+), 11 deletions(-)
diff --git a/index.js b/index.js
index a8c8af5..92c42b3 100755
--- a/index.js
+++ b/index.js
@@ -72,11 +72,11 @@ app.post('/', function(req, res) {
res.status(400).send("Please provide a POST parameter called html");
} else {
try {
- let document = new JSDOM(html);
- let markdown = process_dom(url, document, res, inline_title, ignore_links);
- send_headers(res);
- res.send(markdown);
- } catch (error) {
+ let document = new JSDOM(html);
+ let markdown = process_dom(url, document, res, inline_title, ignore_links);
+ send_headers(res);
+ res.send(markdown);
+ } catch (error) {
res.status(400).send("Could not parse that document");
}
}
@@ -98,11 +98,12 @@ function process_dom(url, document, res, inline_title, ignore_links) {
res.header("X-Title", encodeURIComponent(title.textContent));
let reader = new Readability(document.window.document);
let readable = reader.parse().content;
- let replacement = {placeholders:[], tables:[]}
- readable = format_tables(readable, replacement);
+ let replacements = []
+ readable = format_tables(readable, replacements);
+ readable = format_code_blocks(readable, replacements);
let markdown = service.turndown(readable);
- for (let i=0;i]*>(?:.|\n)*?<\/table>)/gi);
if (tables) {
for (let t=0;t"+placeholder+"
");
}
}
return html;
}
+function format_code_blocks(html, replacements) {
+ const start = replacements.length;
+ const code_blocks = html.match(/(]*>(?:.|\n)*?<\/pre>)/gi);
+ if (code_blocks) {
+ for (let cb=0;cb"+placeholder+"");
+ }
+ }
+ return html;
+}
+
+function code_block_to_markdown (html) {
+ const match_pre = /^]*>([\s\S]*)<\/pre>$/ig.exec(html);
+ let inner_html = match_pre[1];
+ const match_code = /^\s*]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
+ if (match_code && match_code[1])
+ inner_html = match_code[1];
+ const markdown = "```\n"+inner_html+"\n```\n";
+ return markdown;
+}