-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathmeizu_xiaomi.js
176 lines (163 loc) · 5.12 KB
/
meizu_xiaomi.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
/**
* Created by liuxing on 14-9-17.
*/
var request = require('request'),
weiUrl = require('./core/config/api_weibo'),
when = require('when'),
_ = require('lodash'),
async = require('async'),
access_token = '2.00IfZPzC1am4YB9f6a6ada02bxDNeC',
weibiId = 'Blf0XBux4',
requestCount = 200, // 评论接口每次最多获取200条数据
sources = ['MEIZU MX','小米','微博','其他'], //所有微博评论来源
weibos = [],
commentsCount;
filterWord = '1799',
openFiter = true ;//ture 为开启关键字过滤
var result = { //只总计小米和魅族手机的数据
'MEIZU MX':[],
'小米':[],
'微博':[],
'其他':[],
};
/**
* 请求单条评论的信息
* @param weiboId 微博的id
* @param page 页号
* @returns {Promise}
*/
function comments(weiboId, page) {
var defer = when.defer();
var data = {
url: weiUrl.comments.show,
qs: {
id: weiboId,
access_token: access_token,
count: requestCount,
page: page
}
};
request(data, function (error, response, body) {
//console.log(JSON.parse(body));
defer.resolve(JSON.parse(body));
});
return defer.promise;
}
/**
*通过微博(评论、私信)MID获取其ID
* @param mid 原始微博id 见url
* @returns {Promise}
*/
function queryid(mid) {
var defer = when.defer();
var data = {
url: weiUrl.statuses.queryid,
qs: {
mid: mid,
type: 1,
access_token: access_token,
isBase62: 1
}
};
request(data, function (error, response, body) {
return defer.resolve(JSON.parse(body));
});
return defer.promise;
}
/**
* 该微博所有评论信息 、评论数据处理
* @param weiboId
*/
function getCommentsInfo(weiboId) {
comments(weiboId, 1).then(function (result) {
//console.log(result);
filterData(result);
commentsCount = result.total_number;
//检测是否需要遍历
if (result.next_cursor == 0) { // 没有下一页
console.log('评论总数 :' + commentsCount);
analysis(weibos);
return;
} else {
var pageNum = parseInt(result.total_number / requestCount)+1;
console.log('一共 %s 数据,每页 %s 条数据,一共 %s 页 ',commentsCount,requestCount,pageNum);
for (var i = 2; i <= pageNum; i++) { //从第二页开始
var data = {
weiboId: weiboId,
page: i
};
work.push(data, function () {
});
}
}
});
}
/**
* 抓取到的微博评论列表,
* 过滤某些数据
* @param result
*/
function filterData(result){
var comments = result.comments;
_.forEach(comments, function (item) {
var source = item.source.replace(/<\/?[^>]+>/gi, "");
var weibo = {
userId : item.user.name,
text : item.text,
source: source
};
if(openFiter){ //过滤 关键字功能 是否打开
if( weibo.text.indexOf(filterWord) > -1) {
if (_.indexOf(sources, source) == -1) {
// sources.push(source);
}
weibos.push(weibo);
}
}else{
weibos.push(weibo);
}
});
}
function analysis(weibos) {
console.log('抓取的所有评论数量 %s',weibos.length);
// var result = _.groupBy(weibos, function (weibo) {
// return weibo.source
// });
_.forEach(weibos,function(weibo){
if(weibo.source.indexOf(sources[0]) > -1){ // 魅族手机
result[sources[0]].push(weibo);
}else if(weibo.source.indexOf(sources[1]) > -1){// 魅族手机 小米
result[sources[1]].push(weibo);
}else if(weibo.source.indexOf(sources[2]) > -1){// 微博网页和客户端
result[sources[2]].push(weibo);
}else {
result[sources[3]].push(weibo); //其他
}
});
var clientLength = weibos.length;
_.forEach(sources, function (source) {
console.log(result[source].length+' 占比 %s % '+' '+source ,(result[source].length * 100 / clientLength).toFixed(2));
});
console.log('####################');
console.log('包含1799的评论');
_.forEach(sources, function (source) {
//console.log(result[source]);
});
}
//抓取其他页面的工作队列
var work = async.queue(function (task, callback) {
comments(task.weiboId, task.page).then(function (result) {
filterData(result); //过滤和组装数据
console.log('task %s 完成,当前的评论数量为 %s',task.page,weibos.length);
}).then(function () {
callback('finish page num:' + task.page);
});
}, 1);
work.drain = function () {
console.log('评论总数 :' + commentsCount);
analysis(weibos);
console.log('all items have been processed');
};
queryid(weibiId).then(function (data) {
getCommentsInfo(data.id);
});