diff --git a/Spider.js b/Spider.js index 70c2d5d..4f0fe08 100644 --- a/Spider.js +++ b/Spider.js @@ -1,5 +1,6 @@ var request = require('request'); var Promise = require('bluebird'); +var config = require('./config'); var Spider = function() { return { @@ -20,8 +21,7 @@ var Spider = function() { _xsrf: "44f011b01f29816fc257fae1770a9ece" }, headers: { - 'cookie': '_za=c392e6c0-5bdb-4b01-a06d-84ffb9836a61; _ga=GA1.2.378714859.1433690880; _xsrf=44f011b01f29816fc257fae1770a9ece; q_c1=fb660ee5c15b4c97ae2b4b075373e5b6|1457189679000|1433392648000; udid="AIAAQIMSlAmPTohRXib_bmZtsg_JbPW-tC8=|1457502059"; cap_id="MGE2NDZmOTY3MDY1NDdlZmJiNDk4NjBmOGY2ZjhiMTY=|1457847388|27a0720e2f3c9580f52f982b99a7a34d4d902bee"; z_c0="QUFBQTFuTWRBQUFYQUFBQVlRSlZUWHFKREZjQXItUVVZRkRhV0k0TEpUV182SUp5TlA1UUtBPT0=|1457847418|d5d52a10b95375f90c3c34c1393314cf323aca84"; n_c=1; __utmt=1; __utma=51854390.378714859.1433690880.1457863920.1457863920.1; __utmb=51854390.12.9.1457864042502; __utmc=51854390; __utmz=51854390.1457863920.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100-1|2=registration_date=20130824=1^3=entry_date=20130824=1', - //'content-length': '171', + 'cookie': config.cookie, 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8', 'cache-control': 'no-cache', 'x-requested-with': 'XMLHttpRequest' @@ -48,10 +48,13 @@ function parseCard(text) { var result = {}; var re1 = /data-id=\"(\S*)\"/g; var re2 = /

.*>(.*)<\/a><\/h2>/g + var re3 = /href=\"(https:\/\/www\.zhihu\.com\/people\/\S*)\"/g; re1.exec(text); result.hash_id = RegExp.$1; re2.exec(text); result.name = RegExp.$1; + re3.exec(text); + result.url = RegExp.$1; return result; } @@ -60,4 +63,4 @@ function consoleLog(x) { return x; } -module.exports = Spider; \ No newline at end of file +module.exports = Spider; diff --git a/config.js b/config.js new file mode 100644 index 0000000..46b9f14 --- /dev/null +++ b/config.js @@ -0,0 +1,3 @@ +module.exports = { + cookie:'_za=c392e6c0-5bdb-4b01-a06d-84ffb9836a61; _ga=GA1.2.378714859.1433690880; _xsrf=44f011b01f29816fc257fae1770a9ece; q_c1=fb660ee5c15b4c97ae2b4b075373e5b6|1457189679000|1433392648000; udid="AIAAQIMSlAmPTohRXib_bmZtsg_JbPW-tC8=|1457502059"; cap_id="MGE2NDZmOTY3MDY1NDdlZmJiNDk4NjBmOGY2ZjhiMTY=|1457847388|27a0720e2f3c9580f52f982b99a7a34d4d902bee"; z_c0="QUFBQTFuTWRBQUFYQUFBQVlRSlZUWHFKREZjQXItUVVZRkRhV0k0TEpUV182SUp5TlA1UUtBPT0=|1457847418|d5d52a10b95375f90c3c34c1393314cf323aca84"; n_c=1; __utmt=1; __utma=51854390.378714859.1433690880.1457863920.1457863920.1; __utmb=51854390.12.9.1457864042502; __utmc=51854390; __utmz=51854390.1457863920.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100-1|2=registration_date=20130824=1^3=entry_date=20130824=1', +} \ No newline at end of file diff --git a/getUserHashID.js b/getUserHashID.js index deadec9..29d068e 100644 --- a/getUserHashID.js +++ b/getUserHashID.js @@ -1,5 +1,6 @@ var request = require('request'); var Promise = require('bluebird'); +var config = require('./config'); function getUserHashID(userPageUrl) { return new Promise(function(resolve, reject) { @@ -7,7 +8,7 @@ function getUserHashID(userPageUrl) { method: 'GET', url: userPageUrl, headers: { - 'cookie': '_za=c392e6c0-5bdb-4b01-a06d-84ffb9836a61; _ga=GA1.2.378714859.1433690880; _xsrf=44f011b01f29816fc257fae1770a9ece; q_c1=fb660ee5c15b4c97ae2b4b075373e5b6|1457189679000|1433392648000; udid="AIAAQIMSlAmPTohRXib_bmZtsg_JbPW-tC8=|1457502059"; cap_id="MGE2NDZmOTY3MDY1NDdlZmJiNDk4NjBmOGY2ZjhiMTY=|1457847388|27a0720e2f3c9580f52f982b99a7a34d4d902bee"; z_c0="QUFBQTFuTWRBQUFYQUFBQVlRSlZUWHFKREZjQXItUVVZRkRhV0k0TEpUV182SUp5TlA1UUtBPT0=|1457847418|d5d52a10b95375f90c3c34c1393314cf323aca84"; n_c=1; __utmt=1; __utma=51854390.378714859.1433690880.1457863920.1457863920.1; __utmb=51854390.12.9.1457864042502; __utmc=51854390; __utmz=51854390.1457863920.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); __utmv=51854390.100-1|2=registration_date=20130824=1^3=entry_date=20130824=1', + 'cookie': config.cookie } }, function(err, res, body) { if (err) { @@ -25,4 +26,4 @@ function parseHashID(html) { return RegExp.$1; } -module.exports = getUserHashID; \ No newline at end of file +module.exports = getUserHashID; diff --git a/index.js b/index.js index af608f3..4ef74d2 100644 --- a/index.js +++ b/index.js @@ -2,26 +2,49 @@ var Spider = require('./Spider'); var getUserHashID = require('./getUserHashID'); var Promise = require('bluebird'); getUserHashID('https://www.zhihu.com/people/avit4799') - .then(function(hashID) { - var works = [Spider().request({ - followees: true, - hash_id: hashID - }), Spider().request({ - hash_id: hashID - })]; - return Promise.all(works); + .then(function(hashID){ + return getFriends(hashID); + }).then(function(myFriends){ + //console.log(myFriends); + + return searchSameFriend('fc3d841ce5b084b7550c0cc85364c448', myFriends) + }).then(function(result){ + console.log(result); }) - .then(function(result) { + +function getFriends(hashID) { + var works = [Spider().request({ + followees: true, + hash_id: hashID + }), Spider().request({ + hash_id: hashID + })]; + return Promise.all(works).then(function(result) { console.log("fetch success!!!"); var followees = result[0].data; var followers = result[1].data; var friends = []; - followers.forEach(function(follower, index) { - followees.forEach(function(followee, index) { + followers.forEach(function(follower) { + followees.forEach(function(followee) { if (follower.hash_id === followee.hash_id) { friends.push(follower); } }); }); - console.log(friends); + return friends; }); +} + +function searchSameFriend(targetHashID, myFriends){ + return getFriends(targetHashID).then(function(targetFriends){ + var sameFriends = []; + targetFriends.forEach(function(targetFriend){ + myFriends.forEach(function(myFriend){ + if(targetFriend.hash_id === myFriend.hash_id){ + sameFriends.push(targetFriend); + } + }) + }) + return sameFriends; + }) +}