Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
Marvin Zhang committed Jun 21, 2019
1 parent 5f4e2b1 commit 89f3a87
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 4 deletions.
3 changes: 0 additions & 3 deletions crawlab/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@
from routes.stats import StatsApi
from routes.tasks import TaskApi

# 打上猴子补丁
monkey.patch_all()

# flask app instance
app = Flask(__name__)
app.config.from_object('config')
Expand Down
42 changes: 41 additions & 1 deletion crawlab/routes/spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,14 @@ def get(self, id=None, action=None):
if spider is None:
stats = get_file_suffix_stats(dir_path)
lang = get_lang_by_stats(stats)
spider = db_manager.save('spiders', {
spider_id = db_manager.save('spiders', {
'name': dir_name,
'src': dir_path,
'lang': lang,
'suffix_stats': stats,
'type': SpiderType.CUSTOMIZED
})
spider = db_manager.get('spiders', id=spider_id)

# existing spider
else:
Expand Down Expand Up @@ -214,11 +215,50 @@ def get(self, id=None, action=None):
items[i]['last_5_errors'] = get_last_n_run_errors_count(spider_id=spider['_id'], n=5)
items[i]['last_7d_tasks'] = get_last_n_day_tasks_count(spider_id=spider['_id'], n=5)

# sort spiders by _id descending
items = reversed(sorted(items, key=lambda x: x['_id']))

return {
'status': 'ok',
'items': jsonify(items)
}

def delete(self, id: str = None) -> (dict, tuple):
"""
DELETE method of given id for deleting an spider.
:param id:
:return:
"""
# get spider from db
spider = db_manager.get(col_name=self.col_name, id=id)

# delete spider folder
if spider.get('type') == SpiderType.CUSTOMIZED:
try:
shutil.rmtree(os.path.abspath(os.path.join(PROJECT_SOURCE_FILE_FOLDER, spider['src'])))
except Exception as err:
return {
'status': 'ok',
'error': str(err)
}, 500

# perform delete action
db_manager.remove_one(col_name=self.col_name, id=id)

# remove related tasks
db_manager.remove(col_name='tasks', cond={'spider_id': spider['_id']})

# remove related schedules
db_manager.remove(col_name='schedules', cond={'spider_id': spider['_id']})

# execute after_update hook
self.after_update(id)

return {
'status': 'ok',
'message': 'deleted successfully',
}

def crawl(self, id: str) -> (dict, tuple):
"""
Submit an HTTP request to start a crawl task in the node of given spider_id.
Expand Down

0 comments on commit 89f3a87

Please sign in to comment.