forked from starkwang/Zhihu-Spider
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0fad8cb
commit a83c644
Showing
9 changed files
with
295 additions
and
238 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
node_modules/ | ||
dist/ | ||
|
||
# IDE | ||
.idea | ||
|
||
# config | ||
config.js | ||
config.js |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
"use strict" | ||
|
||
const gulp = require('gulp') | ||
const babel = require('gulp-babel') | ||
const sourcemaps = require('gulp-sourcemaps') | ||
const plumber = require('gulp-plumber') | ||
|
||
gulp.task('babel', () => { | ||
return gulp.src('src/*.js') | ||
.pipe(plumber()) | ||
.pipe(sourcemaps.init()) | ||
.pipe(babel({ | ||
presets: ['es2015'] | ||
})) | ||
.pipe(sourcemaps.write('.')) | ||
.pipe(gulp.dest('dist')) | ||
}) | ||
|
||
gulp.task('watch', () => { | ||
gulp.watch('src/*.js', ['babel']) | ||
}) | ||
|
||
gulp.task('default', ['watch']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,25 @@ | ||
var Spider = require('./src/Spider'); | ||
var express = require('express'); | ||
var bodyParser = require('body-parser'); | ||
const express = require('express') | ||
const bodyParser = require('body-parser') | ||
const http = require('http') | ||
const Spider = require('./dist/Spider') | ||
|
||
var app = express(); | ||
var server = require('http').createServer(app); | ||
var io = require('socket.io')(server); | ||
io.on('connection', function(socket) { | ||
socket.on('fetch start', function(data) { | ||
Spider(data.url, socket); | ||
}); | ||
}); | ||
server.listen(3001); | ||
const app = express() | ||
const server = http.createServer(app) | ||
const io = require('socket.io')(server) | ||
|
||
app.use(bodyParser()) | ||
app.use(express.static('./client')) | ||
|
||
app.use(bodyParser());// WARNING | ||
app.use('/js', express.static('./client/build')); | ||
app.use('/css', express.static('./client/build')); | ||
app.get('/', (req, res) => { | ||
res.sendFile(__dirname + '/client/index.html') | ||
}) | ||
|
||
app.get('/', function(req, res) { | ||
res.sendFile(__dirname + '/client/index.html'); | ||
}); | ||
io.on('connection', socket => { | ||
socket.on('fetch start', data => { | ||
Spider(data.url, socket) | ||
}) | ||
}) | ||
|
||
app.listen(3000,function(){ | ||
console.log('server start at 127.0.0.1:%s',this.address().port) | ||
}); | ||
server.listen(3001) | ||
|
||
app.listen(8080) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,96 +1,113 @@ | ||
var fetchFollwerOrFollwee = require('./fetchFollwerOrFollwee'); | ||
var getUser = require('./getUser'); | ||
var Promise = require('bluebird'); | ||
var config = require('../config'); | ||
module.exports = Spider; | ||
|
||
function Spider(userPageUrl, socket) { | ||
socket.emit('notice', '抓取用户信息......'); | ||
return getUser(userPageUrl) | ||
.then(function(user) { | ||
socket.emit('notice', '抓取用户信息成功'); | ||
socket.emit('get user', user); | ||
return getFriends(user, socket); | ||
}) | ||
.then(function(myFriends) { | ||
return Promise.map(myFriends, function(myFriend) { | ||
return getUser(myFriend.url); | ||
}, { concurrency: config.concurrency ? config.concurrency : 3 }); | ||
}) | ||
.then(function(myFriends) { | ||
var input = []; | ||
myFriends.forEach(function(friend) { | ||
input.push({ | ||
"user": friend, | ||
"sameFriends": [] | ||
}) | ||
}); | ||
socket.emit('data', input); | ||
|
||
console.log(myFriends); | ||
return Promise.map(myFriends, function(myFriend) { | ||
return searchSameFriend(myFriend, myFriends, socket); | ||
}, { concurrency: config.concurrency ? config.concurrency : 3 }); | ||
}) | ||
.then(function(result) { | ||
var data = result; | ||
socket.emit('data', data); | ||
|
||
}) | ||
.catch(function(err) { | ||
console.log(err); | ||
}) | ||
"use strict" | ||
|
||
import Promise from 'bluebird' | ||
import tracer from 'tracer' | ||
import fetchFollwerOrFollwee from './fetchFollwerOrFollwee' | ||
import getUser from './getUser' | ||
import config from '../config' | ||
|
||
const logger = tracer.colorConsole() | ||
|
||
const Spider = (userPageUrl, socket) => { | ||
const concurrency = config.concurrency ? config.concurrency : 3 | ||
|
||
socket.emit('notice', '抓取用户信息......') | ||
|
||
return getUser(userPageUrl) | ||
.then(function (user) { | ||
socket.emit('notice', '抓取用户信息成功') | ||
socket.emit('get user', user) | ||
|
||
return getFriends(user, socket) | ||
}) | ||
.then(function (myFriends) { | ||
return Promise.map(myFriends, myFriend => getUser(myFriend.url), {concurrency}) | ||
}) | ||
.then(function (myFriends) { | ||
let input = [] | ||
|
||
myFriends.forEach(friend => { | ||
input.push({ | ||
user: friend, | ||
sameFriends: [], | ||
}) | ||
}) | ||
|
||
socket.emit('data', input) | ||
|
||
// debug | ||
logger.log(myFriends) | ||
|
||
return Promise.map(myFriends, myFriend => searchSameFriend(myFriend, myFriends, socket), {concurrency}) | ||
}) | ||
.then(function (data) { | ||
socket.emit('data', data) | ||
}) | ||
.catch(function (err) { | ||
// debug | ||
logger.error(err) | ||
}) | ||
} | ||
const getFriends = (user, socket) => { | ||
const options1 = { | ||
isFollowees: true, | ||
user, | ||
} | ||
const options2 = {user} | ||
const works = [fetchFollwerOrFollwee(options1, socket), fetchFollwerOrFollwee(options2, socket)] | ||
|
||
return Promise.all(works) | ||
.then(function(result) { | ||
const [followees, followers] = result | ||
let friends = [] | ||
|
||
followers.forEach(follower => { | ||
followees.forEach(followee => { | ||
if (follower.hash_id === followee.hash_id) { | ||
friends.push(follower) | ||
} | ||
}) | ||
}) | ||
|
||
return friends | ||
}) | ||
} | ||
const searchSameFriend = (aFriend, myFriends, socket) => { | ||
socket.emit("notice", "searchSameFriend with " + aFriend.name + "......") | ||
|
||
// debug | ||
logger.log("searchSameFriend with " + aFriend.name + "......") | ||
|
||
return getFriends(aFriend, socket) | ||
.then(function(targetFriends) { | ||
let sameFriends = [] | ||
|
||
// debug | ||
logger.log('counting for ' + aFriend.name + '......') | ||
logger.log("\n\n==============\n Same Friends with " + aFriend.name + "\n") | ||
|
||
targetFriends.forEach(targetFriend => { | ||
myFriends.forEach(myFriend => { | ||
if (targetFriend.hash_id === myFriend.hash_id) { | ||
sameFriends.push(targetFriend) | ||
} | ||
}) | ||
}) | ||
|
||
socket.emit('same friend', { | ||
hash_id: aFriend.hash_id, | ||
sameFriends: sameFriends | ||
}) | ||
|
||
// debug | ||
logger.log(sameFriends) | ||
logger.log("\n\n") | ||
|
||
function getFriends(user, socket) { | ||
var works = [fetchFollwerOrFollwee({ | ||
isFollowees: true, | ||
user: user | ||
}, socket), fetchFollwerOrFollwee({ | ||
user: user | ||
}, socket)]; | ||
return Promise.all(works).then(function(result) { | ||
var followees = result[0]; | ||
var followers = result[1]; | ||
var friends = []; | ||
followers.forEach(function(follower) { | ||
followees.forEach(function(followee) { | ||
if (follower.hash_id === followee.hash_id) { | ||
friends.push(follower); | ||
} | ||
}); | ||
}); | ||
return friends; | ||
}); | ||
return { | ||
user: aFriend, | ||
sameFriends, | ||
} | ||
}) | ||
} | ||
|
||
function searchSameFriend(aFriend, myFriends, socket) { | ||
socket.emit("notice", "searchSameFriend with " + aFriend.name + "......"); | ||
console.log("searchSameFriend with " + aFriend.name + "......"); | ||
return getFriends(aFriend, socket) | ||
.then(function(targetFriends) { | ||
var sameFriends = []; | ||
console.log('counting for ' + aFriend.name + '......') | ||
targetFriends.forEach(function(targetFriend) { | ||
myFriends.forEach(function(myFriend) { | ||
if (targetFriend.hash_id === myFriend.hash_id) { | ||
sameFriends.push(targetFriend); | ||
} | ||
}) | ||
}) | ||
console.log("\n\n==============\n Same Friends with " + aFriend.name + "\n"); | ||
socket.emit('same friend', { | ||
hash_id: aFriend.hash_id, | ||
sameFriends: sameFriends | ||
}) | ||
console.log(sameFriends); | ||
console.log("\n\n"); | ||
|
||
return { | ||
user: aFriend, | ||
sameFriends: sameFriends | ||
}; | ||
}) | ||
} | ||
module.exports = Spider |
Oops, something went wrong.