From a0e868f81bdf30ac823aafe1ec8309008c69f41b Mon Sep 17 00:00:00 2001 From: Alex Kiessling <32677188+ajkiessl@users.noreply.github.com> Date: Mon, 24 Oct 2022 17:23:05 -0400 Subject: [PATCH] Only Group Duplicates when new publication during AI import (#583) * Tweaks to ai importer to only group duplicates when a new publication is created and tweaked the duplicate grouping task to toggle publications to not be visible if they have not already been grouped. * Niftany --- app/importers/activity_insight_importer.rb | 11 +++--- app/models/duplicate_publication_group.rb | 5 +++ .../activity_insight_importer_spec.rb | 4 +-- .../duplicate_publication_group_spec.rb | 34 +++++++++++++++++++ 4 files changed, 46 insertions(+), 8 deletions(-) diff --git a/app/importers/activity_insight_importer.rb b/app/importers/activity_insight_importer.rb index 5c2ece156..4277db035 100644 --- a/app/importers/activity_insight_importer.rb +++ b/app/importers/activity_insight_importer.rb @@ -167,6 +167,11 @@ def call update_pub_record(pub_record, pub) else pi.save! + + DuplicatePublicationGroup.group_duplicates_of(pub_record) + if pub_record.reload.duplicate_group + pub_record.update!(visible: false) + end end if pub_record.updated_by_user_at.blank? @@ -195,12 +200,6 @@ def call c.save! end end - - DuplicatePublicationGroup.group_duplicates_of(pub_record) - - if pub_record.reload.duplicate_group - pub_record.update!(visible: false) - end end rescue StandardError => e log_error(pub, e, u) diff --git a/app/models/duplicate_publication_group.rb b/app/models/duplicate_publication_group.rb index 87fbfccfa..62195ad19 100644 --- a/app/models/duplicate_publication_group.rb +++ b/app/models/duplicate_publication_group.rb @@ -8,8 +8,13 @@ def self.group_duplicates total: Publication.count) Publication.find_each do |p| + already_grouped = p.duplicate_group.present? group_duplicates_of(p) + group = p.reload.duplicate_group + if !already_grouped && group + p.update!(visible: false) + end pbar.increment end pbar.finish diff --git a/spec/component/importers/activity_insight_importer_spec.rb b/spec/component/importers/activity_insight_importer_spec.rb index 8d222e9a8..46d532c3b 100644 --- a/spec/component/importers/activity_insight_importer_spec.rb +++ b/spec/component/importers/activity_insight_importer_spec.rb @@ -1179,7 +1179,7 @@ end it 'groups duplicates of new publication records' do - expect { importer.call }.to change(DuplicatePublicationGroup, :count).by 2 + expect { importer.call }.to change(DuplicatePublicationGroup, :count).by 1 p1 = PublicationImport.find_by(source: 'Activity Insight', source_identifier: '190706413568').publication @@ -1465,7 +1465,7 @@ end it 'groups duplicates of new publication records' do - expect { importer.call }.to change(DuplicatePublicationGroup, :count).by 2 + expect { importer.call }.to change(DuplicatePublicationGroup, :count).by 1 p1 = PublicationImport.find_by(source: 'Activity Insight', source_identifier: '190706413568').publication diff --git a/spec/component/models/duplicate_publication_group_spec.rb b/spec/component/models/duplicate_publication_group_spec.rb index a22ceaeaf..0d30b7018 100644 --- a/spec/component/models/duplicate_publication_group_spec.rb +++ b/spec/component/models/duplicate_publication_group_spec.rb @@ -400,6 +400,40 @@ expect(p15_1.reload.duplicate_group.publications).to match_array [p15_1, p15_2] expect(p16_1.reload.duplicate_group.publications).to match_array [p16_1, p16_2] end + + it "sets grouped publications' visible statuses to false when publication was not already grouped before process started" do + described_class.group_duplicates + + expect(p1_1.reload.duplicate_group.publications.map(&:visible)).to match_array [true, false, false, false] + expect(p2_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p3_1.reload.visible).to be true + expect(p3_2.reload.visible).to be true + expect(p4_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p5_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p6_1.reload.visible).to be true + expect(p6_2.reload.visible).to be true + expect(p7_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p8_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p9_1.reload.visible).to be true + expect(p9_2.reload.visible).to be true + expect(p10_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p11_1.reload.visible).to be true + expect(p11_2.reload.visible).to be true + expect(p11_3.reload.visible).to be true + expect(p12_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false] + expect(p12_2.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false] + expect(p12_3.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false] + expect(p13_1.reload.visible).to be true + expect(p13_2.reload.visible).to be true + expect(p14_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false, false] + expect(p14_2.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false, false] + expect(p14_3.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false, false] + expect(p14_4.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false, false, false] + expect(p15_1.reload.duplicate_group.publications.map(&:visible)).to match_array [false, false] + expect(p16_1.reload.duplicate_group.publications.map(&:visible)).to match_array [true, false] + expect(p17_1.reload.visible).to be true + expect(p17_2.reload.visible).to be true + end end end