From b73a9cef1b8661f490f70422c59bb0577f6c1553 Mon Sep 17 00:00:00 2001 From: macsplit Date: Fri, 13 May 2022 17:47:42 +0100 Subject: [PATCH] rollback code-block enhancement due to issues downstream with ToMarkdown --- index.js | 39 +++++---------------------------------- 1 file changed, 5 insertions(+), 34 deletions(-) diff --git a/index.js b/index.js index 9c6f9da..fa0174b 100755 --- a/index.js +++ b/index.js @@ -8,7 +8,6 @@ const table_to_markdown = require('./html_table_to_markdown.js'); const validURL = require('@7c/validurl'); const express = require('express'); const rateLimit = require('express-rate-limit'); -const htmlentities = require('html-entities'); const port = process.env.PORT; @@ -72,14 +71,14 @@ app.post('/', function(req, res) { if (!html) { res.status(400).send("Please provide a POST parameter called html"); } else { - //try { + try { let document = new JSDOM(html); let markdown = process_dom(url, document, res, inline_title, ignore_links); send_headers(res); res.send(markdown); - //} catch (error) { - // res.status(400).send("Could not parse that document"); - //} + } catch (error) { + res.status(400).send("Could not parse that document"); + } } }); @@ -100,8 +99,7 @@ function process_dom(url, document, res, inline_title, ignore_links) { let reader = new Readability(document.window.document); let readable = reader.parse().content; let replacements = [] - readable = format_tables(readable, replacements); - readable = format_code_blocks(readable, replacements); + readable = format_tables(readable, replacements); let markdown = service.turndown(readable); for (let i=0;i]*>(?:.|\n)*?<\/pre>)/gi); - if (code_blocks) { - for (let cb=0;cb"+placeholder+"

"); - } - } - return html; -} - -function code_block_to_markdown (html) { - const match_pre = /^]*>([\s\S]*)<\/pre>$/ig.exec(html); - let inner_html = match_pre[1]; - const match_code = /^\s*]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html); - if (match_code && match_code[1]) - inner_html = match_code[1]; - inner_html = inner_html.replaceAll(/]*>/ig,"\n"); - inner_html = inner_html.replaceAll(/<[^>]+>/ig, ""); - inner_html = htmlentities.decode(inner_html); - const markdown = "```\n"+inner_html+"\n```\n"; - return markdown; -}