-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathknow.js
91 lines (81 loc) · 2.14 KB
/
know.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
const
fs = require("fs"),
xlsx = require("xlsx")
cheerio = require('cheerio'),
request = require('superagent');
class Handle {
excle() {
let
i,
x,
url,
urlArr = [],
realm,
url1 = [],
know = 'know.baidu.com',
RemoveReg = '/展开全部/g',
leaf = /健视佳/g,
wenda = 'wenda.so.com',
ask = 'ask.360kad.com',
wukong = 'www.wukong.com',
sogou = 'wenwen.sogou.com',
iask = 'iask.sina.com.cn',
kuaiwen = 'kuaiwen.pcbaby.com.cn',
tieba = 'tieba.baidu.com',
realmReg = /[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.[a-zA-Z0-9][-a-zA-Z0-9]{0,62})+\.?/,
rangeReg = /[0-9]+$/g,
workbook = xlsx.readFile('file.xlsx'),
sheetName = workbook.SheetNames,
sheet1 = workbook.Sheets[sheetName[0]],
arr = xlsx.utils.sheet_to_json(sheet1, {
header: 1
}),
ws = xlsx.utils.aoa_to_sheet(arr),
ref = sheet1['!ref'],
range = ref.match(rangeReg)[0];
delete ws['!ref']
console.log(ref)
for (i = 2; i < range; i++) {
url = ws['E' + i].v,
realm = url.match(realmReg)[0];
if (realm == know) {
let c=url,
index=i;
request.get(url)
.end((err, res) => {
if (!err) {
let
a = 1,
html = res.text,
$ = cheerio.load(html, {
decodeEntities: true
}),
$itemMod = $('.answer-content').find('.con'),
context = $itemMod.text();
if (leaf.test(context)) {
// console.log(context + ['K'+index])
// console.log('————————————————————————')
// console.log(c)
ws['K'+index]={v:'是',t:'s'}
// console.log('————————————————————————')
} else {
ws['K'+index]={v:'否',t:'s'}
}
let wb={
SheetNames: ['mySheet'],
Sheets: {
'mySheet': Object.assign({}, ws, { '!ref': ref })
}
}
xlsx.writeFile(wb, 'output.xlsx');
}
})
}
};
}
read() {
}
}
var a = new Handle()
a.excle()
a.read()