Skip to content

Commit

Permalink
Update mysql_to_elastic.py
Browse files Browse the repository at this point in the history
- Added total articles in ES.
  • Loading branch information
tfnribeiro committed Nov 21, 2024
1 parent d867896 commit 9e9e457
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions tools/mysql_to_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def gen_docs(articles_w_topics):
)
print("Got articles without topics, total: ", len(target_ids))

total_articles_in_es = 0
if len(target_ids) == 0:
print("No articles found! Exiting...")
return
Expand All @@ -114,11 +115,13 @@ def gen_docs(articles_w_topics):
ids_in_es = set(
[int(hit["_id"]) for hit in scan(es, index=ES_ZINDEX, query=es_query)]
)
total_articles_in_es = len(ids_in_es)
target_ids_not_in_es = list(filter(lambda x: x not in ids_in_es, target_ids))
else:
# The index was deleted / doesn't exist:
target_ids_not_in_es = target_ids

print(f"""Total articles in ES: {total_articles_in_es}""")
print(f"""Total articles missing: {len(target_ids_not_in_es)}""")
print(f"""Indexing a total of: {TOTAL_ITEMS}, in batches of: {ITERATION_STEP}""")

Expand Down

0 comments on commit 9e9e457

Please sign in to comment.