Skip to content

Commit

Permalink
download results
Browse files Browse the repository at this point in the history
  • Loading branch information
Marvin Zhang committed May 30, 2019
1 parent 0370fe4 commit fb11a14
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion crawlab/routes/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def download_results(self, id: str):
if not col_name:
return send_csv([], f'results_{col_name}_{round(time())}.csv')
items = db_manager.list(col_name, {'task_id': id}, limit=999999999)
fields = get_spider_col_fields(col_name)
fields = get_spider_col_fields(col_name, task_id=id, limit=999999999)
return send_csv(items,
filename=f'results_{col_name}_{round(time())}.csv',
fields=fields,
Expand Down
9 changes: 7 additions & 2 deletions crawlab/utils/spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,17 @@ def get_spider_type(path: str) -> SpiderType:
return SpiderType.SCRAPY


def get_spider_col_fields(col_name: str) -> list:
def get_spider_col_fields(col_name: str, task_id: str = None, limit: int = 100) -> list:
"""
Get spider collection fields
:param col_name: collection name
:param task_id: task_id
:param limit: limit
"""
items = db_manager.list(col_name, {}, limit=100, sort_key='_id')
filter_ = {}
if task_id is not None:
filter_['task_id'] = task_id
items = db_manager.list(col_name, filter_, limit=limit, sort_key='_id')
fields = set()
for item in items:
for k in item.keys():
Expand Down

0 comments on commit fb11a14

Please sign in to comment.