From 7367cfaacd4d7f52ae53961d066049f1e634a496 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Fri, 15 Mar 2024 17:45:29 -0500 Subject: [PATCH 01/17] update comments on backfill scripts to give context of what previous scripts where used for --- scripts/backfill_classifications.py | 7 +++++++ scripts/backfill_talk_comments.py | 5 +++++ scripts/copy_classifications_from_files.py | 7 +++++++ scripts/save_classifications_chunk_in_files.py | 7 +++++++ 4 files changed, 26 insertions(+) diff --git a/scripts/backfill_classifications.py b/scripts/backfill_classifications.py index a5b03fb..5b00f7d 100644 --- a/scripts/backfill_classifications.py +++ b/scripts/backfill_classifications.py @@ -1,3 +1,10 @@ +## +## This script was used in VM when first introducing ERAS. We needed to backfill classifications into ERAS db. +## Unfortunately there was too much data to do a straight copy from panoptes db to copy to eras db. +## The script was followed up with save_classifications_chunk_in_files.py and copy_classifications_from_files.py, which copies from panoptes +## DB to csvs and then csvs to Eras DB. See PR: https://github.com/zooniverse/eras/pull/40 +## + import os import psycopg from datetime import datetime diff --git a/scripts/backfill_talk_comments.py b/scripts/backfill_talk_comments.py index d5d04d8..14f452f 100644 --- a/scripts/backfill_talk_comments.py +++ b/scripts/backfill_talk_comments.py @@ -1,3 +1,8 @@ +## +## This script was used in VM when first introducing ERAS. We needed to backfill talk comments into ERAS db. +## This script is a straight COPY FROM Talk DB to COPY TO ERAS DB. +## + import os import psycopg from datetime import datetime diff --git a/scripts/copy_classifications_from_files.py b/scripts/copy_classifications_from_files.py index 7595b6d..4f9ac1a 100644 --- a/scripts/copy_classifications_from_files.py +++ b/scripts/copy_classifications_from_files.py @@ -1,3 +1,10 @@ +## +## This script along with save_classifications_chunk_in_files.py was used in VM when first introducing ERAS. +## We needed to backfill classifications into ERAS db. +## The script was preluded with backfll_classifications.py which does a straight copy from panoptes db to copy to eras db. +## See PR: https://github.com/zooniverse/eras/pull/40 +## + import os import psycopg from datetime import datetime diff --git a/scripts/save_classifications_chunk_in_files.py b/scripts/save_classifications_chunk_in_files.py index 7758aa6..5e286f7 100644 --- a/scripts/save_classifications_chunk_in_files.py +++ b/scripts/save_classifications_chunk_in_files.py @@ -1,3 +1,10 @@ +## +## This script along with copy_classifications_from_files.py was used in VM when first introducing ERAS. +## We needed to backfill classifications into ERAS db. +## The script was preluded with backfll_classifications.py which does a straight copy from panoptes db to copy to eras db. +## See PR: https://github.com/zooniverse/eras/pull/40 +## + import os import psycopg from datetime import datetime From ace522eb66822813625d4970c461c286d64ae366 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Tue, 26 Mar 2024 09:45:30 -0500 Subject: [PATCH 02/17] adding comments for context on old scripts --- scripts/copy_classifications_from_files.py | 1 + scripts/save_classifications_chunk_in_files.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/copy_classifications_from_files.py b/scripts/copy_classifications_from_files.py index 4f9ac1a..0f5a006 100644 --- a/scripts/copy_classifications_from_files.py +++ b/scripts/copy_classifications_from_files.py @@ -2,6 +2,7 @@ ## This script along with save_classifications_chunk_in_files.py was used in VM when first introducing ERAS. ## We needed to backfill classifications into ERAS db. ## The script was preluded with backfll_classifications.py which does a straight copy from panoptes db to copy to eras db. +## There was too much data to do a straight copy from panoptes db to copy to eras db, so we had to chunk in files. ## See PR: https://github.com/zooniverse/eras/pull/40 ## diff --git a/scripts/save_classifications_chunk_in_files.py b/scripts/save_classifications_chunk_in_files.py index 5e286f7..fec9572 100644 --- a/scripts/save_classifications_chunk_in_files.py +++ b/scripts/save_classifications_chunk_in_files.py @@ -2,6 +2,7 @@ ## This script along with copy_classifications_from_files.py was used in VM when first introducing ERAS. ## We needed to backfill classifications into ERAS db. ## The script was preluded with backfll_classifications.py which does a straight copy from panoptes db to copy to eras db. +## There was too much data to do a straight copy from panoptes db to copy to eras db, so we had to chunk in files. ## See PR: https://github.com/zooniverse/eras/pull/40 ## From 2827e81af1be009d1b1287fcb9b2f1c2ca7d4ff0 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 14:19:19 -0500 Subject: [PATCH 03/17] update backfill script in ruby --- scripts/panoptes_membership_client.rb | 50 +++++++++++++ ...roup_membership_classification_backfill.py | 4 +- ...roup_membership_classification_backfill.rb | 72 +++++++++++++++++++ 3 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 scripts/panoptes_membership_client.rb create mode 100644 scripts/user_group_membership_classification_backfill.rb diff --git a/scripts/panoptes_membership_client.rb b/scripts/panoptes_membership_client.rb new file mode 100644 index 0000000..5108831 --- /dev/null +++ b/scripts/panoptes_membership_client.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +require 'pg' + +class PanoptesMembershipClient + def user_ids_not_in_user_group(user_group_id, domain_formats) + conn.exec( + "SELECT id FROM users + WHERE email ILIKE ANY(STRING_TO_ARRAY('#{domain_formats.join(',')}', ',')) + AND id NOT IN (SELECT user_id FROM memberships where user_group_id=#{user_group_id}) + " + ).entries.map { |res| res['id'].to_i } + end + + def insert_memberships(user_group_id, user_ids) + memberships_to_create = user_memberships(user_group_id, user_ids) + + member_creation_sql_query = memberships_insert_query(memberships_to_create) + + conn.exec_params(member_creation_sql_query, memberships_to_create.flatten) + end + + private + + def conn + @conn ||= PG.connect(ENV.fetch('PANOPTES_DB_URI'), sslmode: 'require') + end + + def user_memberships(user_group_id, user_ids) + memberships_to_create = [] + user_ids.each do |user_id| + # membership in array order: user_id, user_group_id, state, roles + membership = [ + user_id, + user_group_id, + 0, + '{"group_member"}' + ] + memberships_to_create << membership + end + memberships_to_create + end + + def memberships_insert_query(memberships_to_create) + # Values is part of sql query will look like ($1, $2, $3, $4), ($5, $6, $7, $8), ..etc.. + values = memberships_to_create.length.times.map { |i| "($#{(4 * i) + 1}, $#{(4 * i) + 2}, $#{(4 * i) + 3}, $#{(4 * i) + 4})" }.join(',') + + "INSERT INTO memberships (user_id, user_group_id, state, roles) VALUES #{values}" + end +end diff --git a/scripts/user_group_membership_classification_backfill.py b/scripts/user_group_membership_classification_backfill.py index 9b26a50..8db8191 100644 --- a/scripts/user_group_membership_classification_backfill.py +++ b/scripts/user_group_membership_classification_backfill.py @@ -18,7 +18,7 @@ current_time = now.strftime("%H:%M:%S") print("BEFORE Time =", current_time) - +sc parser = argparse.ArgumentParser() parser.add_argument("-ug", "--user_group_id", type=int) parser.add_argument('email_domain_formats') @@ -47,7 +47,7 @@ panoptes_db_conn.commit() # eras get classification_events of not_in_group_yet_user_ids that does not have user_group_id within their user_group_ids classification_event - eras_cursor.execute("SELECT classification_id, event_time, session_time, project_id, user_id, workflow_id, created_at, updated_at, user_group_ids from classification_events WHERE user_id = ANY(%s) AND %s!=ANY(user_group_ids)", (not_in_group_yet_user_ids, user_group_id)) + eras_cursor.execute("SELECT classification_id, event_time, session_time, project_id, user_id, workflow_id, created_at, updated_at, user_group_ids from classification_events WHERE user_id IN %s AND %s!=ANY(user_group_ids)", (not_in_group_yet_user_ids, user_group_id)) classification_events_to_backfill = eras_cursor.fetchall() # create classification_user_group diff --git a/scripts/user_group_membership_classification_backfill.rb b/scripts/user_group_membership_classification_backfill.rb new file mode 100644 index 0000000..f5597a0 --- /dev/null +++ b/scripts/user_group_membership_classification_backfill.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: true + +require '../config/environment' +require './panoptes_membership_client' +require 'json' + +corporate_user_groups_str = ENV.fetch('CORPORATE_PARTNERS') +corporate_partners = JSON.parse(corporate_user_groups_str) + +puts 'Starting Classification and Membership Backfill...' + +panoptes_client = PanoptesMembershipClient.new + +corporate_partners.each do |corporate_partner| + puts "Geting Ids of users that are not in group yet for #{corporate_partner['corp_name']}..." + not_yet_member_user_ids = panoptes_client.user_ids_not_in_user_group(corporate_partner['user_group_id'], corporate_partner['domain_formats']) + + puts "Query found #{not_yet_member_user_ids.length} users not in the #{corporate_partner['corp_name']} user_group..." + + next unless not_yet_member_user_ids.length.positive? + + puts "Creating Memberships for #{corporate_partner['corp_name']}..." + panoptes_client.insert_memberships(corporate_partner['user_group_id'], not_yet_member_user_ids) + + puts 'Querying Eras ClassificationEvents of newly created members...' + classification_events_to_backfill = ClassificationEvent.where('user_id IN (?)', not_yet_member_user_ids) + + next unless classification_events_to_backfill.length.positive? + + puts 'Creating Classification User Groups...' + classification_user_groups_to_create = [] + classification_events_to_backfill.each do |classification| + classification_user_group = { + classification_id: classification.classification_id, + event_time: classification.event_time, + project_id: classification.project_id, + workflow_id: classification.workflow_id, + user_id: classification.user_id, + session_time: classification.session_time, + user_group_id: corporate_partner['user_group_id'] + } + classification_user_groups_to_create << classification_user_group + end + + ClassificationUserGroup.upsert_all(classification_user_groups_to_create, + unique_by: %i[classification_id event_time user_group_id user_id]) + + puts 'ClassificationUserGroup Upsert Finished...' +end + +today = Date.today.to_s +two_days_ago = (Date.today - 2).to_s +puts 'Classification and Membership Backfill Finished. Starting CA Refresh...' +puts 'Refreshing Continuous Aggregates dealing with User Groups...' + +puts 'Refreshing Daily Group Classifications Count And Time...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time', '#{two_days_ago}', '#{today}')") + +puts 'Refreshing Daily Group Classifications Count And Time Per Project...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_project', '#{two_days_ago}', '#{today}')") + +puts 'Refreshing Daily Group Classifications Count And Time Per User...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_user', '#{two_days_ago}', '#{today}')") + +puts 'Refreshing Daily Group Classifications Count And Time Per User And Project...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_user_per_project', '#{two_days_ago}', '#{today}')") + +puts 'Refreshing Daily Group Classifications Count And Time Per User And Workflow...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_user_per_workflow', '#{two_days_ago}', '#{today}')") + +puts 'Refreshing Daily Group Classifications Count And Time Per Workflow...' +ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_workflow', '#{two_days_ago}', '#{today}')") From 4b8f5eae83ff87b24196ff111aa4c07dda51bef8 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 14:45:18 -0500 Subject: [PATCH 04/17] update env variables to use Rails.app.credentials. attempt at cron job kubernetes template --- config/credentials/production.yml.enc | 2 +- kubernetes/cron_sync.yml | 27 +++++++++++++++++++ scripts/panoptes_membership_client.rb | 3 ++- ...roup_membership_classification_backfill.rb | 4 ++- 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 kubernetes/cron_sync.yml diff --git a/config/credentials/production.yml.enc b/config/credentials/production.yml.enc index 9e0b707..d5936ab 100644 --- a/config/credentials/production.yml.enc +++ b/config/credentials/production.yml.enc @@ -1 +1 @@ -lCfkIMPH/fwl6Cf1K+EWfeiHK/ITstJhGzRXMTS7XqvUG2odRAmlIlbRDibfLoGPj7kPMJzwvnpHgZN2ey09RS1dp3ZzrorY6JC3gnnsrB9rX0+dLZihtj+Tlvtzwd8H4Yv3OM3um0jWr9DD3sOq4N3a7kSm1Wqkr+UqqGrWLusESvEeDV/c6sJkjjI8q0Sbv34Mc3inSOZVcIk1DocxneYA1dSWDOWqbhF1vd2p53ubqGJIq3RPQEt7881IECek3iDuBXPPVaw6H2KaerBv0utnMhmST3mcNtAeAEzWcw/j8TztMqvaFowmp5+lo0jNJR4wNOUQaT7XiOtRFZfA6i9T0gUtqAz+gZxJshRarYpbKUb703PvQ4WAsh2Fs9g+v0PPuW2sSK6tfEzn84c9EQUCeQVKTeATsQlXAGuO9vYdKTw0ahnJzdFfql5LXUH7VJ+12cNbOanjVFjykcs3qY2N69PK1/ESAPqkyK8Fe0fosfAepUVm3rd7VLNMc2qGdWAxTAfdj59xYNaGU97+xRSATBMUdsGNoFcyv7WTUUxcn01Qz9WqyLdCzyIABEwvcXWtc3pNJ3U7bwC0fsqQkL+zqt883eYGDxU4Mn0H5IX21OfqTYDuPB3mQltuQe2KUq2KZcCLtBVce0nRGKx82dEhnjP+PRWk7moCsG8eyNqqi1i1ZqZcDemPTXL48+IgCLfmMYju7uDWTM5OWG9e+TWEqUowwUV+Gp5AoGKUt/DtfityIQmdJde7+6YaxYDsDEKZfP6na4C2Zyt6qXtCWYIBXo8sEuEE7j38lrWFgNODc8k53PgSBp6R0lPjzw5/U9HPzBKmkn705JdfRN5kYbKVpCPOQqMpjYU1jSWpqdO4ZgobEWT5n3XtHCuV6oO/P5t2lbwm+mVuVotrjv3p7/x+CbN+nTvA9Kma7mOdK8FMrQlqXD/FpnSvSLuO5/3Q7LltdKF3njmmUmsqFnbpXOaxKpgDb+ijeuwndEZ1hvyLepcxahBevbOS+ZOtdWHmfGhTlZ4uCgbgAXJ04FH8xFrf1/VG4GtMHuapT6tlJcxqBer4--8e7ThvhXjefdpI+0--QG+VUWhyp9cydSvpzydc0w== \ No newline at end of file +hD5l4bBd4gVw4DS/oeGO3XDXcwPUCSVCt+TFbmQsy+Hx4mBSTLF22nRljM/KDXPSU2b2RqbOl3AcDTeZWlNHqK0OUL68CPjrOWK0KFOqKPgqsh+ydEcjpCWkm1A5mQdxCO4GQ2zjZwZxeQvZbvGiPz1sc4WpVd5zILUcYeiK2P8lLqcLdSHroQbG9+L/0VT8d+0B1mNsdJ339S3cI0y+4Cp9evwi3l7fjIRpHN10tPFJJkuKd90syGOif1tDvtScl1bfkK3f7xmNL2lzDBMEGq2BzhFte/QZmBKYnHEeAolmMthLMQqQdZTftGUrxaZeh4OtzcKwerox81ZmUjVkubTGJEsSrB2Q8iR5RMZNscmjbzem6YEiT4mef7HeAECasZG5IbwHAMMh+s/BnKVWUlS0Srit5Zt40Judyde1K1UgjxsCsqWJS4JYcfhkmBzPi7x1paxGlWcezsEAJZFqjiqzIzvc4cvDoyCaEO1FMfbvOlHWgTH5A2xV2xnCiwE2DgkrVR9a9vaCkwxAdNV44Esq8npeVkytF39khDuaWWsn2dh+3j6vcnzGyI2gbIgyYAv7aV0+JEPBGRlPHHhVt3nUzYQUHaJHUL+yhjkIuMKxIilkOig+RhZDjdujR+kZdqP8KyVXuAg8Ui/h+/kG2XMtQR8YhHIQZFhZk9TVQQK2IC7PKmqJEo/V8zh53q37vaODthUNzACp9ww7UsZ6/35oconmuVpa1Ucijahw5Fiv1WEGG2oI0QPEhEQToG5dWHi2DbYwmQe1d/deG/CdFKMhdGaCGeOC/fZWTtUqKfDp70eytfic8I1jwsK1p3smO3/PZYNXwo78VXaZVKepqhp8IgVxPoNIY4YtmaEpigcHMmm409quT7/mX3fTpdGqcUPBVxx94w5xYVACmStGIGP2AesIgfdbxz0HzDf1aNMj8iEZuyo1gQ4rUiD6YygbzdDXOjuVa3Aq8ZvO3ixkJWFB65w6+KsLmt93zodHndH4N62PnmUw6eAJCmIxbBaJqU1kim8IQd0zXv5Bx3hMq7GVulMZccsIzZoCWDACJic+jsYM3Rp7UBP1WYsGpfOSWC14PXQGVwmXKhG3hn3a/Lim/nkMKUclx1Hy2W+qpDPKDl27uQPxF9wt4ZwkAyBdKXgDCpq11GPI2Kkj5Zvhrx+labnulixV6HfZpQJdcVk11Rqf5O8l6v/AHjSaQ6qrEztNLZL/IE5cqeWiZZZOIxUQuYVa6aldsEH9+niHzD6/U6JvHVP0dRj5PNF0TepNtvFkzUKZI+fTG6R6OYyMSBcOOW7i6brSCNM1I+Yx3+F7eACovJQhl9spb0YEbxvENYeHStlWYUnxFlqslbPHIikB/2J1BMzpscVC7AR2nJgBifoGcrMX2w9cCH2SgGW2HFkr21b51YuXQo99vBrk0dUudDC7s1f4bnudtaLZOUjrCcY0DudT8RUcDrVfJpkxOvGgcI6zxIzoXLu9SW3bS27zJWfhhZxXqljfczHgXThFWqlQd518XUGmfxGrg9C+fP2ix6ngAaIAkywavbLvfltYe6mkJGrTIjVAns6GX3QtroLJYvvCq2i0BHRJSvISJVfzZ7E6JYLdVJS9ur3C3jrIGl4iiiVJeuwjcJA7NFlrey9aN/MR/4jwj7TlMqi+47bQGThT1nzyZmvkB2EJpO271Vw1dSbUsaj5hz5dKiypav3puMXHq7IguQv12gUaRX5ccMaNqDbyXhC0jWWa9xoFsisAEqNCNCrwosivND18fbaaNUZ2G0QuuPFjYlTiEKvXuaaip/QOmYlQ8q99arps7rn6SK8rhRX3ymhxLXyx5yIjtI+BXMvylXM05OVMWmhm7ST3p7ecqENCS0Y2cA119mjUd6TG+ulLyw4l3UGIBAwLrUcraNvpiYNUQcx+y07OfHAjct4lIdseaCqnqPB37W4k9p6aYo+CRlaE/uKtckdkBCtlHxxeoREyQyepTgtI2uq+i+UEeYmqQrHIAEsT+C76cSaeuwINRYQ6z6AjrmvgvQ4LjteyV2ky1M7ckBGZMnh1mLcB6jN0zEsqwgs74yYeZnmEVN0nwOc+3M9OHwZEjIIbgedHBXhOF/y+Aa9dSomYaHrmVDzrRvUmF2z0gqNQAcbL7TVo9FmwLxBYUFqORXLlf0B6An5vpDzEslw3sKSx4+hbclYDAGKsLuPPeFD1O2D3v+WluUqwCSdkVXNNnPD0rZUT/8jVVSBDb11Vvbtl603cTaZJcRujvynJllx1jky3RiVuTkG7HBGTLIozy9Na6eQzATBZazRQC7cVobySMsyf4CKtHcAH97Vl5fSrYcBtTkgTbWrZrVfMJwT6ufnSyDXHkOXWhi37J9J5Ig/w6c4G1yQcjOa12jsHsWhRTNt8XY6b9r2vW1kqA2+bbtW9Z8o8NcskNCoPKOKzeRlEJ2cPvAFV5Fh+H9FZN6UpCxMDEuggBjAB/J3EYjFG6XEVqEOhfXMVOmRJ9ywsp7whxtk1F5kxhRDXomOsCxydd3gGAyIdwF0NHIptjaa7DIrxbCB+u+BtmytZzE00QwoeffVmLhoa8JyTMHGdK7ion+bfuFnEiP8/m1O8uvFesnfDzWp6+/F8Au4m2GACcZcjn0TsoNW7iYy6xMXn32X7rKcs12baJFbLsH4xJfdOPM0RlH16E79n6g8p7uUb8RSzrJ0cUsLC5dRVhw2hQWNbzQudEMtciIvvfGAkDBpUcpu/4ZkmS8RdblmJ+BDT0yFKddWdD217EYzrPbrpXXomPU95kEq9z9i0EKA5e8hjGt1roAgGh51/sezhrLVGmAeIPodxbxbWOWH3UTt+HUP5QpnRq8k2qILk4FuiFRlHUTNLvVtSdk1Z8XybLwmYMdoGTSQjD1mSmVSouA==--UPEZvym9PPXcBjaM--xrk0OyBCCPOBgaRp2gjxIg== \ No newline at end of file diff --git a/kubernetes/cron_sync.yml b/kubernetes/cron_sync.yml new file mode 100644 index 0000000..b205b31 --- /dev/null +++ b/kubernetes/cron_sync.yml @@ -0,0 +1,27 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: eras-corp-partners-cron-sync-job +spec: + schedule: "0 3 * * *" + jobTemplate: + spec: + template: + metadata: + name: eras-corp-partners-sync + spec: + containers: + - name: eras-corp-partners-sync + image: ghcr.io/zooniverse/eras + env: + - name: RAILS_LOG_TO_STDOUT + value: "true" + - name: RAILS_ENV + value: production + - name: RAILS_MASTER_KEY + valueFrom: + secretKeyRef: + name: eras-production + key: rails-master-key + restartPolicy: Never + backoffLimit: 2 \ No newline at end of file diff --git a/scripts/panoptes_membership_client.rb b/scripts/panoptes_membership_client.rb index 5108831..7e284d0 100644 --- a/scripts/panoptes_membership_client.rb +++ b/scripts/panoptes_membership_client.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require 'pg' +require '../config/environment' class PanoptesMembershipClient def user_ids_not_in_user_group(user_group_id, domain_formats) @@ -23,7 +24,7 @@ def insert_memberships(user_group_id, user_ids) private def conn - @conn ||= PG.connect(ENV.fetch('PANOPTES_DB_URI'), sslmode: 'require') + @conn ||= PG.connect(Rails.application.credentials.panoptes_db_uri, sslmode: 'require') end def user_memberships(user_group_id, user_ids) diff --git a/scripts/user_group_membership_classification_backfill.rb b/scripts/user_group_membership_classification_backfill.rb index f5597a0..bd9b2fd 100644 --- a/scripts/user_group_membership_classification_backfill.rb +++ b/scripts/user_group_membership_classification_backfill.rb @@ -4,7 +4,7 @@ require './panoptes_membership_client' require 'json' -corporate_user_groups_str = ENV.fetch('CORPORATE_PARTNERS') +corporate_user_groups_str = Rails.application.credentials.corporate_user_groups corporate_partners = JSON.parse(corporate_user_groups_str) puts 'Starting Classification and Membership Backfill...' @@ -70,3 +70,5 @@ puts 'Refreshing Daily Group Classifications Count And Time Per Workflow...' ActiveRecord::Base.connection.exec_query("CALL refresh_continuous_aggregate('daily_group_classification_count_and_time_per_workflow', '#{two_days_ago}', '#{today}')") + +puts 'Stats User Group Membership and Classification Backfill Completed' From f29f6107fa169a2d03112dbf2c63658d6fedd32a Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 14:52:13 -0500 Subject: [PATCH 05/17] Update cron_sync.yml --- kubernetes/cron_sync.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/kubernetes/cron_sync.yml b/kubernetes/cron_sync.yml index b205b31..9bea41a 100644 --- a/kubernetes/cron_sync.yml +++ b/kubernetes/cron_sync.yml @@ -23,5 +23,6 @@ spec: secretKeyRef: name: eras-production key: rails-master-key + command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] restartPolicy: Never backoffLimit: 2 \ No newline at end of file From 0d4e2a6f195270500e5a9abf374e18f4eab89042 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 15:00:46 -0500 Subject: [PATCH 06/17] Update user_group_membership_classification_backfill.py --- scripts/user_group_membership_classification_backfill.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/user_group_membership_classification_backfill.py b/scripts/user_group_membership_classification_backfill.py index 8db8191..647bf56 100644 --- a/scripts/user_group_membership_classification_backfill.py +++ b/scripts/user_group_membership_classification_backfill.py @@ -18,7 +18,6 @@ current_time = now.strftime("%H:%M:%S") print("BEFORE Time =", current_time) -sc parser = argparse.ArgumentParser() parser.add_argument("-ug", "--user_group_id", type=int) parser.add_argument('email_domain_formats') From 2c335adae91d565172904cc247c1a878ee4bf66d Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 15:16:02 -0500 Subject: [PATCH 07/17] update panoptes_membership_client.rb to create insert query using array.new vs times.map per hound --- scripts/panoptes_membership_client.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/panoptes_membership_client.rb b/scripts/panoptes_membership_client.rb index 7e284d0..497eac4 100644 --- a/scripts/panoptes_membership_client.rb +++ b/scripts/panoptes_membership_client.rb @@ -44,8 +44,9 @@ def user_memberships(user_group_id, user_ids) def memberships_insert_query(memberships_to_create) # Values is part of sql query will look like ($1, $2, $3, $4), ($5, $6, $7, $8), ..etc.. - values = memberships_to_create.length.times.map { |i| "($#{(4 * i) + 1}, $#{(4 * i) + 2}, $#{(4 * i) + 3}, $#{(4 * i) + 4})" }.join(',') - + values = Array.new(memberships_to_create.length) do |i| + "($#{(4 * i) + 1}, $#{(4 * i) + 2}, $#{(4 * i) + 3}, $#{(4 * i) + 4})" + end.join(',') "INSERT INTO memberships (user_id, user_group_id, state, roles) VALUES #{values}" end end From bf1260720412a563b68b0db8df6b954ea43642cb Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Wed, 10 Apr 2024 15:25:08 -0500 Subject: [PATCH 08/17] adding a more descriptive comment insert query --- scripts/panoptes_membership_client.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/panoptes_membership_client.rb b/scripts/panoptes_membership_client.rb index 497eac4..8a01207 100644 --- a/scripts/panoptes_membership_client.rb +++ b/scripts/panoptes_membership_client.rb @@ -43,7 +43,7 @@ def user_memberships(user_group_id, user_ids) end def memberships_insert_query(memberships_to_create) - # Values is part of sql query will look like ($1, $2, $3, $4), ($5, $6, $7, $8), ..etc.. + # Values is a string that will look like ($1, $2, $3, $4), ($5, $6, $7, $8), ..etc.. values = Array.new(memberships_to_create.length) do |i| "($#{(4 * i) + 1}, $#{(4 * i) + 2}, $#{(4 * i) + 3}, $#{(4 * i) + 4})" end.join(',') From f06f1879ac14eae2f38b41bf9b2df4a2c0a4c787 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Mon, 15 Apr 2024 15:29:46 -0500 Subject: [PATCH 09/17] add manual sync --- .../workflows/manual_corp_user_group_sync.yml | 31 +++++++++++++++++++ ...ync.yml => corp_user_groups_cron_sync.yml} | 2 +- kubernetes/manual_corp_user_group_sync.yml | 25 +++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/manual_corp_user_group_sync.yml rename kubernetes/{cron_sync.yml => corp_user_groups_cron_sync.yml} (97%) create mode 100644 kubernetes/manual_corp_user_group_sync.yml diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml new file mode 100644 index 0000000..1ef2838 --- /dev/null +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -0,0 +1,31 @@ +name: Manually Sync Eras Corporate User Group Sync + +on: + workflow_dispatch: + +jobs: + manual_sync: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3.5.2 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - uses: azure/login@v1 + with: + creds: ${{ secrets.AZURE_AKS }} + + - name: Set the target AKS cluster + uses: Azure/aks-set-context@v3 + with: + cluster-name: microservices + resource-group: kubernetes + + - name: Modify & apply template + run: kubectl create -f kubernetes/manual_corp_user_group_sync.yml \ No newline at end of file diff --git a/kubernetes/cron_sync.yml b/kubernetes/corp_user_groups_cron_sync.yml similarity index 97% rename from kubernetes/cron_sync.yml rename to kubernetes/corp_user_groups_cron_sync.yml index 9bea41a..e90b276 100644 --- a/kubernetes/cron_sync.yml +++ b/kubernetes/corp_user_groups_cron_sync.yml @@ -25,4 +25,4 @@ spec: key: rails-master-key command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] restartPolicy: Never - backoffLimit: 2 \ No newline at end of file + backoffLimit: 2 diff --git a/kubernetes/manual_corp_user_group_sync.yml b/kubernetes/manual_corp_user_group_sync.yml new file mode 100644 index 0000000..724b6bd --- /dev/null +++ b/kubernetes/manual_corp_user_group_sync.yml @@ -0,0 +1,25 @@ +apiVersion: batch/v1 +kind: Job +metadata: + generateName: eras-corp-partners-sync- +spec: + template: + metadata: + name: eras-corp-partners-sync + spec: + containers: + - name: eras-corp-partners-sync + image: ghcr.io/zooniverse/eras + env: + - name: RAILS_LOG_TO_STDOUT + value: "true" + - name: RAILS_ENV + value: production + - name: RAILS_MASTER_KEY + valueFrom: + secretKeyRef: + name: eras-production + key: rails-master-key + command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] + restartPolicy: Never + backoffLimit: 2 From fe179a92939d54b4b94cc61b1c27d18b8a88ae32 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 18 Apr 2024 11:26:07 -0500 Subject: [PATCH 10/17] Update .github/workflows/manual_corp_user_group_sync.yml Co-authored-by: Zach Wolfenbarger --- .github/workflows/manual_corp_user_group_sync.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml index 1ef2838..15fe495 100644 --- a/.github/workflows/manual_corp_user_group_sync.yml +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -28,4 +28,5 @@ jobs: resource-group: kubernetes - name: Modify & apply template - run: kubectl create -f kubernetes/manual_corp_user_group_sync.yml \ No newline at end of file + run: kubectl create -f kubernetes/manual_corp_user_group_sync.yml + \ No newline at end of file From a5ed815ee9c239509a27b2e639157e9433c0ff4e Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 18 Apr 2024 11:26:20 -0500 Subject: [PATCH 11/17] Update .github/workflows/manual_corp_user_group_sync.yml Co-authored-by: Zach Wolfenbarger --- .github/workflows/manual_corp_user_group_sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml index 15fe495..2f36125 100644 --- a/.github/workflows/manual_corp_user_group_sync.yml +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3.5.2 + uses: actions/checkout@v4.1.1 - name: Login to GitHub Container Registry uses: docker/login-action@v2 From 83739dfd88454be8e7f46d986b028460b977c437 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 18 Apr 2024 11:26:33 -0500 Subject: [PATCH 12/17] Update .github/workflows/manual_corp_user_group_sync.yml Co-authored-by: Zach Wolfenbarger --- .github/workflows/manual_corp_user_group_sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml index 2f36125..468b44e 100644 --- a/.github/workflows/manual_corp_user_group_sync.yml +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -11,7 +11,7 @@ jobs: uses: actions/checkout@v4.1.1 - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3.1.0 with: registry: ghcr.io username: ${{ github.actor }} From 85453a9f34138ae29618b9d90a7e58812c1abd1f Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 18 Apr 2024 11:27:45 -0500 Subject: [PATCH 13/17] Update .github/workflows/manual_corp_user_group_sync.yml Co-authored-by: Zach Wolfenbarger --- .github/workflows/manual_corp_user_group_sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml index 468b44e..3a73b18 100644 --- a/.github/workflows/manual_corp_user_group_sync.yml +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -17,7 +17,7 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - uses: azure/login@v1 + - uses: azure/login@v2 with: creds: ${{ secrets.AZURE_AKS }} From 90d15c512d8813f7044207d9d6dd2184d1186c76 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Thu, 18 Apr 2024 11:27:53 -0500 Subject: [PATCH 14/17] Update .github/workflows/manual_corp_user_group_sync.yml Co-authored-by: Zach Wolfenbarger --- .github/workflows/manual_corp_user_group_sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/manual_corp_user_group_sync.yml b/.github/workflows/manual_corp_user_group_sync.yml index 3a73b18..a292d45 100644 --- a/.github/workflows/manual_corp_user_group_sync.yml +++ b/.github/workflows/manual_corp_user_group_sync.yml @@ -22,7 +22,7 @@ jobs: creds: ${{ secrets.AZURE_AKS }} - name: Set the target AKS cluster - uses: Azure/aks-set-context@v3 + uses: Azure/aks-set-context@v4.0.0 with: cluster-name: microservices resource-group: kubernetes From 9d335c3224cac23211de04152598abf8f0d7787c Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Fri, 19 Apr 2024 10:44:28 -0500 Subject: [PATCH 15/17] Update manual_corp_user_group_sync.yml --- kubernetes/manual_corp_user_group_sync.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/manual_corp_user_group_sync.yml b/kubernetes/manual_corp_user_group_sync.yml index 724b6bd..97b284d 100644 --- a/kubernetes/manual_corp_user_group_sync.yml +++ b/kubernetes/manual_corp_user_group_sync.yml @@ -22,4 +22,4 @@ spec: key: rails-master-key command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] restartPolicy: Never - backoffLimit: 2 + backoffLimit: 2 From 63db1939e5bf0b65baa16c52011b8fef73757296 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Fri, 19 Apr 2024 12:29:31 -0500 Subject: [PATCH 16/17] Update manual_corp_user_group_sync.yml --- kubernetes/manual_corp_user_group_sync.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kubernetes/manual_corp_user_group_sync.yml b/kubernetes/manual_corp_user_group_sync.yml index 97b284d..df55c2a 100644 --- a/kubernetes/manual_corp_user_group_sync.yml +++ b/kubernetes/manual_corp_user_group_sync.yml @@ -20,6 +20,9 @@ spec: secretKeyRef: name: eras-production key: rails-master-key - command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] + command: + - /bin/sh + - -c + - cd scripts; ruby user_group_membership_classification_backfill.rb restartPolicy: Never backoffLimit: 2 From 47c6cacc51382e768e0ac6419aac79e683275748 Mon Sep 17 00:00:00 2001 From: yuenmichelle1 Date: Fri, 19 Apr 2024 12:43:47 -0500 Subject: [PATCH 17/17] update cron yml to properly pull config --- kubernetes/corp_user_groups_cron_sync.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kubernetes/corp_user_groups_cron_sync.yml b/kubernetes/corp_user_groups_cron_sync.yml index e90b276..aec2ffb 100644 --- a/kubernetes/corp_user_groups_cron_sync.yml +++ b/kubernetes/corp_user_groups_cron_sync.yml @@ -23,6 +23,9 @@ spec: secretKeyRef: name: eras-production key: rails-master-key - command: ['ruby', './scripts/user_group_membership_classification_backfill.rb'] + command: + - /bin/sh + - -c + - cd scripts; ruby user_group_membership_classification_backfill.rb restartPolicy: Never backoffLimit: 2