From a71aba412770b4f3241a235cfacc0a80874bb6fd Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Fri, 3 May 2024 16:58:41 -0400 Subject: [PATCH 1/2] DEV-1124: holdings are equal if fields are equal or both empty --- lib/clusterable/holding.rb | 7 +++++- spec/clusterable/holding_spec.rb | 39 ++++++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/lib/clusterable/holding.rb b/lib/clusterable/holding.rb index 0b68d566..d02b0aa3 100644 --- a/lib/clusterable/holding.rb +++ b/lib/clusterable/holding.rb @@ -88,7 +88,12 @@ def self.new_from_scrubbed_file_line(line) # @param other, another holding def ==(other) self.class == other.class && - (fields.keys - EQUALITY_EXCLUDED_FIELDS).all? { |attr| public_send(attr) == other.public_send(attr) } + (fields.keys - EQUALITY_EXCLUDED_FIELDS).all? do |attr| + self_attr = public_send(attr) + other_attr = other.public_send(attr) + + (self_attr == other_attr) or (self_attr.blank? and other_attr.blank?) + end end # Is true when all fields match except for _id diff --git a/spec/clusterable/holding_spec.rb b/spec/clusterable/holding_spec.rb index 6b081f67..dce0a03c 100644 --- a/spec/clusterable/holding_spec.rb +++ b/spec/clusterable/holding_spec.rb @@ -18,7 +18,7 @@ expect(c.holdings.first._parent).to be(c) end - it "normalizees enum_chron" do + it "normalizes enum_chron" do holding = build(:holding, enum_chron: "v.1 Jul 1999") expect(holding.n_enum).to eq("1") expect(holding.n_chron).to eq("Jul 1999") @@ -36,17 +36,42 @@ it "== is true if all fields match except date_received and uuid" do h2.date_received = Date.yesterday h2.uuid = SecureRandom.uuid - expect(h == h2).to be(true) + expect(h).to eq(h2) end it "== is true if all fields match including date_received" do - expect(h == h2).to be(true) + expect(h).to eq(h2) + end + + it "== is true if corresponding fields are nil vs. empty string" do + h.issn = nil + h.n_enum = nil + h.n_chron = nil + h.condition = nil + + h2.issn = "" + h2.n_enum = "" + h2.n_chron = "" + h2.condition = "" + expect(h).to eq(h2) end (described_class.fields.keys - ["date_received", "uuid", "_id"]).each do |attr| it "== is false if #{attr} doesn't match" do - h2[attr] = nil - expect(h == h2).to be(false) + # ensure attribute in h2 is different from h but + # of the same logical type + # + case h[attr] + when String + h2[attr] = "#{h[attr]}junk" + when Numeric + h2[attr] = h[attr] + 1 + when true, false, nil + h2[attr] = !h[attr] + end + + expect(h[attr]).not_to eq(h2[attr]) + expect(h).not_to eq(h2) end end end @@ -55,12 +80,12 @@ let(:h2) { h.clone } it "same_as is true if all fields match" do - expect(h.same_as?(h2)).to be(true) + expect(h).to be_same_as(h2) end it "same_as is not true if date_received does not match" do h2.date_received = Date.yesterday - expect(h.same_as?(h2)).to be(false) + expect(h).not_to be_same_as(h2) end end From cc206fafb8fba3540bcc55ca998afab36b87747c Mon Sep 17 00:00:00 2001 From: Aaron Elkiss Date: Mon, 6 May 2024 12:09:05 -0400 Subject: [PATCH 2/2] DEV-1124: Holdings have same update_key if equivalent fields are both blank The holdings matching function used update_key (a modified hash value) rather than equality; this update folds empty string & nil for computing that update_key as well. --- lib/clusterable/holding.rb | 9 ++++++-- spec/clusterable/holding_spec.rb | 30 ++++++++++++++----------- spec/clustering/cluster_holding_spec.rb | 16 +++++++++++++ 3 files changed, 40 insertions(+), 15 deletions(-) diff --git a/lib/clusterable/holding.rb b/lib/clusterable/holding.rb index d02b0aa3..c052d080 100644 --- a/lib/clusterable/holding.rb +++ b/lib/clusterable/holding.rb @@ -88,7 +88,7 @@ def self.new_from_scrubbed_file_line(line) # @param other, another holding def ==(other) self.class == other.class && - (fields.keys - EQUALITY_EXCLUDED_FIELDS).all? do |attr| + (fields.keys - EQUALITY_EXCLUDED_FIELDS).all? do |attr| self_attr = public_send(attr) other_attr = other.public_send(attr) @@ -112,7 +112,12 @@ def batch_with?(other) # Turn a holding into a hash key for quick lookup # in e.g. cluster_holding.find_old_holdings. def update_key - as_document.slice(*(fields.keys - EQUALITY_EXCLUDED_FIELDS)).hash + as_document + .slice(*(fields.keys - EQUALITY_EXCLUDED_FIELDS)) + # fold blank strings & nil to same update key, as in + # equality above + .transform_values { |f| f.blank? ? nil : f } + .hash end private diff --git a/spec/clusterable/holding_spec.rb b/spec/clusterable/holding_spec.rb index dce0a03c..dee80b38 100644 --- a/spec/clusterable/holding_spec.rb +++ b/spec/clusterable/holding_spec.rb @@ -43,24 +43,28 @@ expect(h).to eq(h2) end - it "== is true if corresponding fields are nil vs. empty string" do - h.issn = nil - h.n_enum = nil - h.n_chron = nil - h.condition = nil - - h2.issn = "" - h2.n_enum = "" - h2.n_chron = "" - h2.condition = "" - expect(h).to eq(h2) + context "when one holding has nil attrs and the other has empty string" do + before(:each) do + [:n_enum=, :n_chron=, :condition=, :issn=].each do |setter| + h.public_send(setter, "") + h2.public_send(setter, nil) + end + end + + it "== is true" do + expect(h).to eq(h2) + end + + it "update_key is the same" do + expect(h.update_key).to eq(h2.update_key) + end end (described_class.fields.keys - ["date_received", "uuid", "_id"]).each do |attr| it "== is false if #{attr} doesn't match" do - # ensure attribute in h2 is different from h but + # ensure attribute in h2 is different from h but # of the same logical type - # + case h[attr] when String h2[attr] = "#{h[attr]}junk" diff --git a/spec/clustering/cluster_holding_spec.rb b/spec/clustering/cluster_holding_spec.rb index c115b32f..d789c24f 100644 --- a/spec/clustering/cluster_holding_spec.rb +++ b/spec/clustering/cluster_holding_spec.rb @@ -65,6 +65,22 @@ def new_submission(holding, date: Date.today) expect(cluster.holdings.first.date_received).to eq(h2.date_received) end + it "updates an existing holding when the current holding has empty string attributes and the new has holding nil attributes" do + [:n_enum=, :n_chron=, :condition=, :issn=].each do |setter| + h.public_send(setter, "") + h2.public_send(setter, nil) + end + + old_date = h.date_received + described_class.new(h).cluster + cluster = Cluster.first + expect(cluster.holdings.first.date_received).to eq(old_date) + described_class.new(h2).cluster + cluster = Cluster.first + expect(cluster.holdings.first.date_received).not_to eq(old_date) + expect(cluster.holdings.first.date_received).to eq(h2.date_received) + end + it "updates only one existing holding" do described_class.new(h).cluster Cluster.first.add_holdings(h.clone)