Skip to content

Commit

Permalink
Update backfill scripts for classifications (#42)
Browse files Browse the repository at this point in the history
* update classifications backfill script to chunk and save in file

* update to <= remove unused limit in query

* update copy into source

* split backfill to file creation then copy from files

* cast to int

* revert accidental adding commas on limit

* add keepalives to hopefully ensure connection does not get lost
  • Loading branch information
yuenmichelle1 authored Oct 18, 2023
1 parent 9d82dcd commit e0f99e2
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion scripts/copy_classifications_from_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

output_file_no = 0
while output_file_no <= num_files:
with psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") as timescale_db_conn:
with psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require keepalives=1 keepalives_idle=30 keepalives_interval=10 keepalives_count=20") as timescale_db_conn:
with timescale_db_conn.cursor(name="timescale_cursor").copy("COPY classification_events FROM STDIN DELIMITER ',' CSV HEADER") as timescale_copy:
timescale_copy.write(open(f"prod_classifications_{output_file_no}.csv").read())
output_file_no += 1
Expand Down
2 changes: 1 addition & 1 deletion scripts/save_classifications_chunk_in_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
else:
query = f"{classifications_query} limit {limit} offset {limit * offset}"

with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require") as panoptes_db_conn:
with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require keepalives=1 keepalives_idle=30 keepalives_interval=10 keepalives_count=20") as panoptes_db_conn:
with open(f"prod_classifications_{offset}.csv", "wb") as f:
with panoptes_db_conn.cursor(name="panoptes_cursor").copy(f"COPY ({classifications_query}) TO STDOUT WITH CSV HEADER", (FIRST_INGESTED_CLASSIFICATION_ID,)) as panoptes_copy:
for data in panoptes_copy:
Expand Down

0 comments on commit e0f99e2

Please sign in to comment.