urltomarkdown/index.js

54 lines
1.4 KiB
JavaScript
Raw Normal View History

2022-01-08 17:05:05 +00:00
const https = require('https');
const turndown = require('turndown');
const { Readability } = require('@mozilla/readability');
const JSDOM = require('jsdom').JSDOM;
2022-01-13 14:40:21 +00:00
const common_filters = require('./url_to_markdown_common_filters');
const validURL = require('@7c/validurl');
2022-01-08 17:05:05 +00:00
service = new turndown();
2022-01-09 15:20:59 +00:00
const rateLimit = require('express-rate-limit');
const rateLimiter = rateLimit({
windowMs: 30 * 1000,
max: 5,
message: 'Rate limit exceeded',
headers: true
});
2022-01-08 17:05:05 +00:00
const express = require('express')
const app = express()
const port = process.env.PORT
2022-01-08 17:05:05 +00:00
2022-01-09 15:20:59 +00:00
app.use(rateLimiter)
2022-01-08 17:05:05 +00:00
app.get('/', (req, res) => {
url = req.query.url;
2022-01-08 18:06:45 +00:00
res.header("Access-Control-Allow-Origin", '*');
2022-01-18 18:02:15 +00:00
res.header("Access-Control-Expose-Headers", 'X-Title');
res.header("Content-Type", 'text/markdown');
if (url && validURL(url)) {
2022-01-08 17:35:43 +00:00
read_url(url, res);
} else {
res.status(400).send("Please specify a valid url query parameter");
2022-01-08 17:35:43 +00:00
}
2022-01-08 17:05:05 +00:00
});
app.listen(port, () => {
})
function read_url(url, res) {
JSDOM.fromURL(url).then((document)=>{
2022-01-18 17:35:03 +00:00
let title = document.window.document.querySelector('title');
if (title)
2022-01-20 13:10:55 +00:00
res.header("X-Title", encodeURIComponent(title.textContent));
2022-01-08 17:05:05 +00:00
let reader = new Readability(document.window.document);
let article = reader.parse();
2022-01-13 14:40:21 +00:00
let markdown = service.turndown(article.content);
let result = common_filters.filter(url, markdown);
2022-01-08 17:05:05 +00:00
res.send(result);
}).catch((error)=> {
res.status(400).send("Sorry, could not fetch and convert that URL");
2022-01-08 17:05:05 +00:00
});
}