-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathwx.js
More file actions
61 lines (56 loc) · 1.61 KB
/
wx.js
File metadata and controls
61 lines (56 loc) · 1.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
const puppeteer = require('puppeteer')
const fs = require('fs')
const execPython = require('./cmd.js').execPython
// 暂停函数
const timeout = (time) => {
return new Promise((resolve, reject) => {
setTimeout(() => {
try{
resolve(1)
} catch (e) {
reject(0)
}
}, time)
})
}
const iGetInnerText = (txt) => {
return txt.replace(/\ +/g, '').replace(/[]/g, '').replace(/[\r\n]/g, '')
}
const args = process.argv.splice(2)
const url = args[0] || 'http://www.jianshu.com/p/ec78f6489153'
puppeteer.launch().then(async browser => {
let page = await browser.newPage()
await page.goto(url)
await timeout(2000)
// 图文内容
let article = await page.evaluate(() => {
let body = document.getElementsByTagName('body')[0]
return body.innerText
})
let idx = article.indexOf('\n')
let title = article.slice(0,idx)
console.log('### 标题 ###')
console.log(title)
let idx2 = article.indexOf('\n', idx + 1)
let info = article.slice(idx, idx2)
console.log('### 简介 ###')
console.log(info)
let content = iGetInnerText(article.slice(idx2))
console.log('### 内容 ###')
console.log(content)
// 写入文件
const pythonScript = ['./python/divider2.py', './python/divider.py']
const fileDir = ['./raw/', './out/', './tmp/', './sort/']
const filename = '18.txt'
const fileArg = fileDir.map((d) => d + filename).join(' ')
console.log(fileArg);
fs.writeFile('./raw/' + filename, content, (err) => {
if (err) {
throw err
} else {
execPython(pythonScript[1], fileArg)
console.log('save success')
}
})
browser.close()
})