Skip to content

Commit

Permalink
add ignore_fields to speed up export
Browse files Browse the repository at this point in the history
  • Loading branch information
danieleguido committed Nov 13, 2024
1 parent 2988888 commit 2b573c1
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions impresso/utils/tasks/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def helper_export_query_as_csv_progress(
query: str,
query_hash: str,
user_bitmap_key: str,
ignore_fields: list = [],
skip: int = 0,
limit: int = 100,
logger: logging.Logger = default_logger,
Expand All @@ -67,9 +68,11 @@ def helper_export_query_as_csv_progress(
- loops (int): The number of loops allowed.
- progress (float): The progress percentage.
"""
contents = find_all(
q=query, fl=settings.IMPRESSO_SOLR_FIELDS, skip=skip, logger=logger
)
# remove fields to speed up the process
query_param_fl = [
field for field in settings.IMPRESSO_SOLR_FIELDS if field not in ignore_fields
]
contents = find_all(q=query, fl=query_param_fl, skip=skip, logger=logger)
total = contents["response"]["numFound"]
qtime = contents["responseHeader"]["QTime"]
# generate extra from job stats
Expand Down Expand Up @@ -100,7 +103,7 @@ def helper_export_query_as_csv_progress(
fieldnames = [
field
for field in settings.IMPRESSO_SOLR_ARTICLE_PROPS
if not field.startswith("_")
if not field.startswith("_") and field not in ignore_fields
]
# Sort fieldnames with 'uid' first, then the rest alphabetically
with open(
Expand Down

0 comments on commit 2b573c1

Please sign in to comment.