From b73a9cef1b8661f490f70422c59bb0577f6c1553 Mon Sep 17 00:00:00 2001
From: macsplit
Date: Fri, 13 May 2022 17:47:42 +0100
Subject: [PATCH] rollback code-block enhancement due to issues downstream with
ToMarkdown
---
index.js | 39 +++++----------------------------------
1 file changed, 5 insertions(+), 34 deletions(-)
diff --git a/index.js b/index.js
index 9c6f9da..fa0174b 100755
--- a/index.js
+++ b/index.js
@@ -8,7 +8,6 @@ const table_to_markdown = require('./html_table_to_markdown.js');
const validURL = require('@7c/validurl');
const express = require('express');
const rateLimit = require('express-rate-limit');
-const htmlentities = require('html-entities');
const port = process.env.PORT;
@@ -72,14 +71,14 @@ app.post('/', function(req, res) {
if (!html) {
res.status(400).send("Please provide a POST parameter called html");
} else {
- //try {
+ try {
let document = new JSDOM(html);
let markdown = process_dom(url, document, res, inline_title, ignore_links);
send_headers(res);
res.send(markdown);
- //} catch (error) {
- // res.status(400).send("Could not parse that document");
- //}
+ } catch (error) {
+ res.status(400).send("Could not parse that document");
+ }
}
});
@@ -100,8 +99,7 @@ function process_dom(url, document, res, inline_title, ignore_links) {
let reader = new Readability(document.window.document);
let readable = reader.parse().content;
let replacements = []
- readable = format_tables(readable, replacements);
- readable = format_code_blocks(readable, replacements);
+ readable = format_tables(readable, replacements);
let markdown = service.turndown(readable);
for (let i=0;i]*>(?:.|\n)*?<\/pre>)/gi);
- if (code_blocks) {
- for (let cb=0;cb"+placeholder+"
");
- }
- }
- return html;
-}
-
-function code_block_to_markdown (html) {
- const match_pre = /^]*>([\s\S]*)<\/pre>$/ig.exec(html);
- let inner_html = match_pre[1];
- const match_code = /^\s*]*>[\r\n]*([\s\S]*)<\/code>\s*$/ig.exec(inner_html);
- if (match_code && match_code[1])
- inner_html = match_code[1];
- inner_html = inner_html.replaceAll(/
]*>/ig,"\n");
- inner_html = inner_html.replaceAll(/<[^>]+>/ig, "");
- inner_html = htmlentities.decode(inner_html);
- const markdown = "```\n"+inner_html+"\n```\n";
- return markdown;
-}