Skip to content

Commit

Permalink
Merge pull request #299 from hathitrust/DEV-1124
Browse files Browse the repository at this point in the history
DEV-1124: holdings are equal if fields are equal or both empty
  • Loading branch information
aelkiss authored May 8, 2024
2 parents ed3107a + cc206fa commit 013f4f8
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 9 deletions.
14 changes: 12 additions & 2 deletions lib/clusterable/holding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,12 @@ def self.new_from_scrubbed_file_line(line)
# @param other, another holding
def ==(other)
self.class == other.class &&
(fields.keys - EQUALITY_EXCLUDED_FIELDS).all? { |attr| public_send(attr) == other.public_send(attr) }
(fields.keys - EQUALITY_EXCLUDED_FIELDS).all? do |attr|
self_attr = public_send(attr)
other_attr = other.public_send(attr)

(self_attr == other_attr) or (self_attr.blank? and other_attr.blank?)
end
end

# Is true when all fields match except for _id
Expand All @@ -107,7 +112,12 @@ def batch_with?(other)
# Turn a holding into a hash key for quick lookup
# in e.g. cluster_holding.find_old_holdings.
def update_key
as_document.slice(*(fields.keys - EQUALITY_EXCLUDED_FIELDS)).hash
as_document
.slice(*(fields.keys - EQUALITY_EXCLUDED_FIELDS))
# fold blank strings & nil to same update key, as in
# equality above
.transform_values { |f| f.blank? ? nil : f }
.hash
end

private
Expand Down
43 changes: 36 additions & 7 deletions spec/clusterable/holding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
expect(c.holdings.first._parent).to be(c)
end

it "normalizees enum_chron" do
it "normalizes enum_chron" do
holding = build(:holding, enum_chron: "v.1 Jul 1999")
expect(holding.n_enum).to eq("1")
expect(holding.n_chron).to eq("Jul 1999")
Expand All @@ -36,17 +36,46 @@
it "== is true if all fields match except date_received and uuid" do
h2.date_received = Date.yesterday
h2.uuid = SecureRandom.uuid
expect(h == h2).to be(true)
expect(h).to eq(h2)
end

it "== is true if all fields match including date_received" do
expect(h == h2).to be(true)
expect(h).to eq(h2)
end

context "when one holding has nil attrs and the other has empty string" do
before(:each) do
[:n_enum=, :n_chron=, :condition=, :issn=].each do |setter|
h.public_send(setter, "")
h2.public_send(setter, nil)
end
end

it "== is true" do
expect(h).to eq(h2)
end

it "update_key is the same" do
expect(h.update_key).to eq(h2.update_key)
end
end

(described_class.fields.keys - ["date_received", "uuid", "_id"]).each do |attr|
it "== is false if #{attr} doesn't match" do
h2[attr] = nil
expect(h == h2).to be(false)
# ensure attribute in h2 is different from h but
# of the same logical type

case h[attr]
when String
h2[attr] = "#{h[attr]}junk"
when Numeric
h2[attr] = h[attr] + 1
when true, false, nil
h2[attr] = !h[attr]
end

expect(h[attr]).not_to eq(h2[attr])
expect(h).not_to eq(h2)
end
end
end
Expand All @@ -55,12 +84,12 @@
let(:h2) { h.clone }

it "same_as is true if all fields match" do
expect(h.same_as?(h2)).to be(true)
expect(h).to be_same_as(h2)
end

it "same_as is not true if date_received does not match" do
h2.date_received = Date.yesterday
expect(h.same_as?(h2)).to be(false)
expect(h).not_to be_same_as(h2)
end
end

Expand Down
16 changes: 16 additions & 0 deletions spec/clustering/cluster_holding_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ def new_submission(holding, date: Date.today)
expect(cluster.holdings.first.date_received).to eq(h2.date_received)
end

it "updates an existing holding when the current holding has empty string attributes and the new has holding nil attributes" do
[:n_enum=, :n_chron=, :condition=, :issn=].each do |setter|
h.public_send(setter, "")
h2.public_send(setter, nil)
end

old_date = h.date_received
described_class.new(h).cluster
cluster = Cluster.first
expect(cluster.holdings.first.date_received).to eq(old_date)
described_class.new(h2).cluster
cluster = Cluster.first
expect(cluster.holdings.first.date_received).not_to eq(old_date)
expect(cluster.holdings.first.date_received).to eq(h2.date_received)
end

it "updates only one existing holding" do
described_class.new(h).cluster
Cluster.first.add_holdings(h.clone)
Expand Down

0 comments on commit 013f4f8

Please sign in to comment.