From a274094df8b84339a249d4d3a44a83f0d1bfd183 Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 6 Jan 2025 14:17:03 -0500 Subject: [PATCH] Remove some mongo-specific one-time use things --- lib/keio_updater.rb | 40 ----------- lib/phctl.rb | 5 -- lib/shared_print/phase_updater.rb | 52 -------------- spec/keio_updater_spec.rb | 90 ------------------------- spec/shared_print/phase_updater_spec.rb | 42 ------------ 5 files changed, 229 deletions(-) delete mode 100644 lib/keio_updater.rb delete mode 100644 lib/shared_print/phase_updater.rb delete mode 100644 spec/keio_updater_spec.rb delete mode 100644 spec/shared_print/phase_updater_spec.rb diff --git a/lib/keio_updater.rb b/lib/keio_updater.rb deleted file mode 100644 index 07668f25..00000000 --- a/lib/keio_updater.rb +++ /dev/null @@ -1,40 +0,0 @@ -require "cluster" -require "services" - -class KeioUpdater - def initialize(limit = nil) - raise "not implemented" - @limit = limit - end - - def limit_query(query) - if @limit.nil? - Cluster.where(**query) - else - Cluster.where(**query).limit(@limit) - end.no_timeout - end - - def run - query = { - "ht_items.0": {"$exists": 1}, - "ht_items.collection_code": "KEIO", - "ht_items.billing_entity": "hathitrust" - } - - limit_query(query).each do |cluster| - cluster.ht_items.each do |ht_item| - if ht_item.collection_code == "KEIO" && ht_item.billing_entity == "hathitrust" - ht_item.billing_entity = "keio" - Services.logger.info "Set billing_entity=keio on ocns:#{cluster.ocns}, item_id:#{ht_item.item_id}" - end - end - cluster.save - end - end -end - -if __FILE__ == $0 - limit = ARGV.shift - KeioUpdater.new(limit).run -end diff --git a/lib/phctl.rb b/lib/phctl.rb index 4e0152f7..a1eef066 100644 --- a/lib/phctl.rb +++ b/lib/phctl.rb @@ -92,11 +92,6 @@ def replace(infile) def deprecate(*infile) run_job(Jobs::SharedPrintOps::Deprecate, options[:verbose], [*infile]) end - - desc "phase3load INFILE", "Load Phase 3 commitments, if valid, from file" - def phase3load(infile) - run_common_job(SharedPrint::Phase3Validator, options, infile) - end end class Report < JobCommand diff --git a/lib/shared_print/phase_updater.rb b/lib/shared_print/phase_updater.rb deleted file mode 100644 index 0d8afb36..00000000 --- a/lib/shared_print/phase_updater.rb +++ /dev/null @@ -1,52 +0,0 @@ -# frozen_string_literal: true - -require "date" - -# This is an outer wrapper for a MongoUpdater call. -# Objective: based on commitments.committed_date, set commitments.phase. -# Usage: bundle exec ruby get_by_date.rb -# E.g. : bundle exec ruby get_by_date.rb "2023-01-31 00:00:00 UTC" 3 -module SharedPrint - class PhaseUpdater - def initialize(date, phase) - raise "not implemented" - # Get input - @date = date - @phase = phase - - validate! - puts "Get commitments with committed_date #{@date}." - puts "Set phase to #{@phase}." - end - - # Make sure date and phase look like they should. - def validate! - date_rx = /^\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\s[A-Z]{3}$/ - raise ArgumentError, "bad date: #{@date}" unless date_rx.match?(@date) - - @phase = @phase.to_i - raise ArgumentError, "bad phase: #{@phase}" unless [0, 1, 2, 3].include?(@phase) - rescue ArgumentError => e - puts "ERROR: Failed validation: #{e.message}" - exit - end - - # Pass on call to MongoUpdater which does all the lifting. - def run - puts "Started: #{Time.now.utc}" - res = MongoUpdater.update_embedded( - clusterable: "commitments", - matcher: {committed_date: DateTime.parse(@date)}, - updater: {phase: @phase} - ) - puts res.inspect - puts "Finished: #{Time.now.utc}" - end - end -end - -if $0 == __FILE__ - date = ARGV.shift - phase = ARGV.shift - SharedPrint::PhaseUpdater.new(date, phase).run -end diff --git a/spec/keio_updater_spec.rb b/spec/keio_updater_spec.rb deleted file mode 100644 index b9424a7a..00000000 --- a/spec/keio_updater_spec.rb +++ /dev/null @@ -1,90 +0,0 @@ -# frozen_string_literal: true - -require "spec_helper" -require "keio_updater" - -RSpec.xdescribe KeioUpdater do - let(:lil_k) { "keio" } - let(:big_k) { "KEIO" } - let(:hword) { "hathitrust" } - let(:upenn) { "upenn" } - - before(:each) do - Cluster.collection.find.delete_many - end - - def build_cluster(ocn) - ht_item = build(:ht_item, ocns: [ocn], collection_code: big_k, billing_entity: hword) - ht_item2 = build(:ht_item, ocns: [ocn], collection_code: "PU", billing_entity: upenn) - create(:cluster, ocns: [ocn]) - cluster_tap_save(ht_item, ht_item2) - end - - def get_all - Cluster.where.to_a - end - - def count_billing_entity(clusters, billing_entity) - clusters.map { |x| x.ht_items.map(&:billing_entity) }.flatten.count(billing_entity) - end - - def run_clusters(max_ocn, limit = nil) - # Setup - 1.upto(max_ocn).each do |ocn| - build_cluster(ocn) - end - - if max_ocn > 100 # Or the count may not be correct >:( - puts "zzz" - sleep 3 - puts "huh?! wha!?" - end - - # I'm using be_within(x).of(y) because on my computer the bigger tests tend to - # be off by 1-2 when max_ocn ~ 1000. I think it is entirely a timing issue. - - # Pre-check - all = get_all - count_hword = count_billing_entity(all, hword) - count_lil_k = count_billing_entity(all, lil_k) - count_upenn = count_billing_entity(all, upenn) - - expect(all.size).to eq max_ocn - expect(count_hword).to be_within(3).of(max_ocn) # Flip these. - expect(count_lil_k).to be_within(3).of(0) # Flip these. - expect(count_upenn).to be_within(3).of(max_ocn) # Not these. - - # Action - described_class.new(limit).run - - # Post-check - all = get_all - count_hword = count_billing_entity(all, hword) - count_lil_k = count_billing_entity(all, lil_k) - count_upenn = count_billing_entity(all, upenn) - - expect(all.size).to eq max_ocn - if limit.nil? - expect(count_hword).to be_within(3).of(0) # These flipped. - expect(count_lil_k).to be_within(3).of(max_ocn) # These flipped. - else - expect(all.size).to eq max_ocn - expect(count_hword).to be_within(3).of(max_ocn - limit) # These flipped. - expect(count_lil_k).to be_within(3).of(limit) # These flipped. - end - expect(count_upenn).to be_within(3).of(max_ocn) # Not these. - end - - it "does the thing for all of 10 items" do - run_clusters(10) - end - it "does the thing for 5 of 50 items" do - run_clusters(50, 5) - end - xit "does the thing 100 times" do - run_clusters(100) - end - xit "does the thing 1000 times" do - run_clusters(1000) - end -end diff --git a/spec/shared_print/phase_updater_spec.rb b/spec/shared_print/phase_updater_spec.rb deleted file mode 100644 index ad443eaf..00000000 --- a/spec/shared_print/phase_updater_spec.rb +++ /dev/null @@ -1,42 +0,0 @@ -require "cluster" -require "shared_print/finder" -require "shared_print/phase_updater" -require "shared_print/phases" - -RSpec.xdescribe SharedPrint::PhaseUpdater do - before(:each) do - Cluster.collection.find.delete_many - end - it "updates `phase` on commitments based on `committed_date`" do - # Make 5 commitments with a known committed_date - # and a phase that needs updating. - clusterables = [] - 1.upto(5) do |i| - clusterables << build( - :commitment, - ocn: i, - phase: SharedPrint::Phases::PHASE_0, - committed_date: SharedPrint::Phases::PHASE_1_DATE - ) - end - cluster_tap_save(*clusterables) - # Verify that we loaded what we think we loaded: - # 5 commitments with the same phase and the same date. - original_commitments = SharedPrint::Finder.new(phase: [0]).commitments.to_a - expect(original_commitments.count).to eq 5 - expect(original_commitments.map(&:phase).uniq).to eq [SharedPrint::Phases::PHASE_0] - expect(original_commitments.map(&:committed_date).uniq).to eq [SharedPrint::Phases::PHASE_1_DATE] - - # Here we want to update to phase 1, to match the phase 1 date. - phase_updater = described_class.new( - SharedPrint::Phases::PHASE_1_DATE, - SharedPrint::Phases::PHASE_1 - ) - phase_updater.run - # Verify that the commitments now have phase 1. - updated_commitments = SharedPrint::Finder.new(phase: [1]).commitments.to_a - expect(updated_commitments.count).to eq 5 - expect(updated_commitments.map(&:phase).uniq).to eq [SharedPrint::Phases::PHASE_1] - expect(updated_commitments.map(&:committed_date).uniq).to eq [SharedPrint::Phases::PHASE_1_DATE] - end -end