add per domain regexp post filtering
							parent
							
								
									3e4cbbf077
								
							
						
					
					
						commit
						07fb74e0b2
					
				
							
								
								
									
										4
									
								
								index.js
								
								
								
								
							
							
						
						
									
										4
									
								
								index.js
								
								
								
								
							|  | @ -2,6 +2,7 @@ const https = require('https'); | |||
| const turndown = require('turndown'); | ||||
| const { Readability } = require('@mozilla/readability'); | ||||
| const JSDOM = require('jsdom').JSDOM; | ||||
| const common_filters = require('./url_to_markdown_common_filters'); | ||||
| 
 | ||||
| service = new turndown(); | ||||
| 
 | ||||
|  | @ -37,7 +38,8 @@ function read_url(url, res) { | |||
| 	JSDOM.fromURL(url).then((document)=>{ | ||||
| 		let reader = new Readability(document.window.document); | ||||
| 		let article = reader.parse(); | ||||
| 		let result = service.turndown(article.content); | ||||
| 		let markdown = service.turndown(article.content); | ||||
| 		let result = common_filters.filter(url, markdown); | ||||
| 		res.send(result); | ||||
| 	}); | ||||
| } | ||||
|  |  | |||
|  | @ -0,0 +1,26 @@ | |||
| var urlparser = require('url'); | ||||
| 
 | ||||
| module.exports = { | ||||
| 
 | ||||
| 	list: [ | ||||
| 		{ | ||||
| 			domain: /.*\.wikipedia\.org/, | ||||
| 			remove: [ | ||||
| 				/\\\[\[edit\]\([^\s]+\s+"[^"]*"\)\\\]/i | ||||
| 			] | ||||
| 		} | ||||
| 	],  | ||||
| 
 | ||||
|   filter: function (url, data) { | ||||
| 	  let domain = urlparser.parse(url).hostname | ||||
| 	  for (let i=0;i<this.list.length;i++) { | ||||
| 	  	if (domain.match(this.list[i].domain)) { | ||||
| 	  		for (let j=0;j<this.list[i].remove.length; j++) { | ||||
| 	  			data = data.replace(this.list[i].remove[j],""); | ||||
| 	  		} | ||||
| 	  	} | ||||
| 	  } | ||||
| 	  return data; | ||||
|   } | ||||
| 
 | ||||
| } | ||||
		Loading…
	
		Reference in New Issue