-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetNews.js
More file actions
56 lines (47 loc) · 1.27 KB
/
getNews.js
File metadata and controls
56 lines (47 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
var cheerio = require('cheerio')
var request = require('request')
var fs = require('fs')
var iconv = require('iconv-lite')
var http = require('http')
/*get one url from file urls.txt*/
fs.readFile('urls.txt','utf-8',function (err,data) {
if(err){
console.log(err);
return ;
}
var urlArray = data.split('\"},');
var urlstr = urlArray[0].split(':\"');
var url = urlstr[1];
console.log(url);
http.get(url,function (res) {
//this step is very important
//it change gbk unicode into binary,avoid error
res.setEncoding('binary');
var article = '';
res.on('data',function (data) {
article += data;
}).on('end', function () {
var buf = new Buffer(article, 'binary');
//decode the content in gbk unicode
//depend on plus-in iconv
var gbkStr = iconv.decode(buf, 'GBK');
var $ = cheerio.load(gbkStr);
var result = [];
var title = $('#h1title').text();
var cont = '';
$('p','#endText').each(function(index,ele){
cont+=$(this).text();
});
result.push({ArticleTitle:title,ArticleContent:cont});
var file = 'article.txt';
fs.writeFile(file,JSON.stringify(result),function(err){
if(err){
console.log("write file failed"+err);
return ;
}
});
})
}).on('error', function (err) {
console.log(err);
});
});