Skip to content

Commit

Permalink
Add sslmode require to backfill scripts (#32)
Browse files Browse the repository at this point in the history
* Create backfill_talk_comments.py

* backfill classifications

* Update backfill_classifications.py

* add user_group_membership_backfill and update select query of backfilling talk comments and classifications

* add update classification events

* add sslmode require to connection strings
  • Loading branch information
yuenmichelle1 authored Sep 21, 2023
1 parent 9cedd9e commit 3dbd13c
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion scripts/backfill_classifications.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
current_time = now.strftime("%H:%M:%S")
print("CLASSIFICATIONS backfill BEFORE Time =", current_time)

with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW}") as panoptes_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}") as timescale_db_conn:
with psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require") as panoptes_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") as timescale_db_conn:
with panoptes_db_conn.cursor(name="panoptes_cursor").copy("COPY (select id as classification_id, created_at as event_time, updated_at as classification_updated_at, TO_TIMESTAMP(metadata ->> 'started_at', 'YYYY-MM-DD HH24:MI:SS') as started_at, TO_TIMESTAMP(metadata ->> 'finished_at', 'YYYY-MM-DD HH24:MI:SS') as finished_at, project_id, workflow_id, user_id, string_to_array(replace(replace(replace(metadata ->> 'user_group_ids', '[', ''), ']', ''), ' ', '' ), ',')::int[] as user_group_ids, EXTRACT(EPOCH FROM TO_TIMESTAMP(metadata ->> 'finished_at', 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP(metadata ->> 'started_at', 'YYYY-MM-DD HH24:MI:SS')) as session_time, created_at, updated_at from classifications where id < %s) TO STDOUT (FORMAT BINARY)", (FIRST_INGESTED_CLASSIFICATION_ID,)) as panoptes_copy:
with timescale_db_conn.cursor().copy("COPY classification_events FROM STDIN (FORMAT BINARY)") as timescale_copy:
for data in panoptes_copy:
Expand Down
2 changes: 1 addition & 1 deletion scripts/backfill_talk_comments.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
FIRST_INGESTED_COMMENT_ID = os.getenv('FIRST_COMMENT_ID')


with psycopg.connect(f"host={TALK_CONN} port={TALK_PORT} dbname={TALK_DB} user={TALK_USER} password={TALK_PW}") as talk_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}") as timescale_db_conn:
with psycopg.connect(f"host={TALK_CONN} port={TALK_PORT} dbname={TALK_DB} user={TALK_USER} password={TALK_PW} sslmode=require") as talk_db_conn, psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require") as timescale_db_conn:
with talk_db_conn.cursor(name="talk").copy("COPY (SELECT id as comment_id, created_at as event_time, updated_at as comment_updated_at, project_id, user_id, created_at, updated_at from comments where id < %s}) TO STDOUT (FORMAT BINARY)", (FIRST_INGESTED_COMMENT_ID,)) as talk_copy:
with timescale_db_conn.cursor().copy("COPY comment_events FROM STDIN (FORMAT BINARY)") as timescale_copy:
for data in talk_copy:
Expand Down
4 changes: 2 additions & 2 deletions scripts/user_group_membership_classification_backfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
# email formats in form of comma separated string with no spaces (eg. "%a.com,%b.org%")
email_formats = args.email_domain_formats

panoptes_db_conn = psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW}")
panoptes_db_conn = psycopg.connect(f"host={PANOPTES_CONN} port={PANOPTES_PORT} dbname={PANOPTES_DB} user={PANOPTES_USER} password={PANOPTES_PW} sslmode=require")
panoptes_cursor = panoptes_db_conn.cursor()

eras_conn = psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW}")
eras_conn = psycopg.connect(f"host={TIMESCALE_CONNECTION} port={TIMESCALE_PORT} dbname={ERAS_DB} user={ERAS_USER} password={ERAS_PW} sslmode=require")
eras_cursor = eras_conn.cursor()

# get ids of users that are not in group yet
Expand Down

0 comments on commit 3dbd13c

Please sign in to comment.