add per domain regexp post filtering
							parent
							
								
									3e4cbbf077
								
							
						
					
					
						commit
						07fb74e0b2
					
				
							
								
								
									
										4
									
								
								index.js
								
								
								
								
							
							
						
						
									
										4
									
								
								index.js
								
								
								
								
							|  | @ -2,6 +2,7 @@ const https = require('https'); | ||||||
| const turndown = require('turndown'); | const turndown = require('turndown'); | ||||||
| const { Readability } = require('@mozilla/readability'); | const { Readability } = require('@mozilla/readability'); | ||||||
| const JSDOM = require('jsdom').JSDOM; | const JSDOM = require('jsdom').JSDOM; | ||||||
|  | const common_filters = require('./url_to_markdown_common_filters'); | ||||||
| 
 | 
 | ||||||
| service = new turndown(); | service = new turndown(); | ||||||
| 
 | 
 | ||||||
|  | @ -37,7 +38,8 @@ function read_url(url, res) { | ||||||
| 	JSDOM.fromURL(url).then((document)=>{ | 	JSDOM.fromURL(url).then((document)=>{ | ||||||
| 		let reader = new Readability(document.window.document); | 		let reader = new Readability(document.window.document); | ||||||
| 		let article = reader.parse(); | 		let article = reader.parse(); | ||||||
| 		let result = service.turndown(article.content); | 		let markdown = service.turndown(article.content); | ||||||
|  | 		let result = common_filters.filter(url, markdown); | ||||||
| 		res.send(result); | 		res.send(result); | ||||||
| 	}); | 	}); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -0,0 +1,26 @@ | ||||||
|  | var urlparser = require('url'); | ||||||
|  | 
 | ||||||
|  | module.exports = { | ||||||
|  | 
 | ||||||
|  | 	list: [ | ||||||
|  | 		{ | ||||||
|  | 			domain: /.*\.wikipedia\.org/, | ||||||
|  | 			remove: [ | ||||||
|  | 				/\\\[\[edit\]\([^\s]+\s+"[^"]*"\)\\\]/i | ||||||
|  | 			] | ||||||
|  | 		} | ||||||
|  | 	],  | ||||||
|  | 
 | ||||||
|  |   filter: function (url, data) { | ||||||
|  | 	  let domain = urlparser.parse(url).hostname | ||||||
|  | 	  for (let i=0;i<this.list.length;i++) { | ||||||
|  | 	  	if (domain.match(this.list[i].domain)) { | ||||||
|  | 	  		for (let j=0;j<this.list[i].remove.length; j++) { | ||||||
|  | 	  			data = data.replace(this.list[i].remove[j],""); | ||||||
|  | 	  		} | ||||||
|  | 	  	} | ||||||
|  | 	  } | ||||||
|  | 	  return data; | ||||||
|  |   } | ||||||
|  | 
 | ||||||
|  | } | ||||||
		Loading…
	
		Reference in New Issue