-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_spider.py
executable file
·51 lines (42 loc) · 1.42 KB
/
run_spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
import os
import scrapy.cmdline
from news.settings import *
import redis
def get_news_source():
redis_client = redis.Redis(REDIS_HOST, REDIS_PORT, REDIS_DB, REDIS_PASS)
if redis_client is None:
return []
sources = redis_client.get('news_source')
if sources is None:
return []
source_list = sources.decode().split(';')
if len(source_list) == 0:
return []
return source_list
def run_for_linux():
while True:
print('run spider at %s' % time.asctime(time.localtime(time.time())))
source_list = get_news_source()
if len(source_list) == 0:
source_list.append('金色财经')
for source in source_list:
print('crawling', source)
if source == '金色财经':
os.system("scrapy crawl jinse")
elif source == '币世界':
os.system("scrapy crawl bishijie")
elif source == '币快报':
os.system("scrapy crawl bikuaibao")
else:
print(source, 'is not supported!')
os.system("scrapy crawl bishijie")
time.sleep(60)
def run_for_windows():
scrapy.cmdline.execute(['scrapy', 'crawl', 'jinse'])
scrapy.cmdline.execute(['scrapy', 'crawl', 'bishijie'])
scrapy.cmdline.execute(['scrapy', 'crawl', 'bikuaibao'])
run_for_linux()
# run_for_windows()