From cdecd04e79ae4e279de6fc6c588cd9653d0214ab Mon Sep 17 00:00:00 2001 From: Alex Kiessling <32677188+ajkiessl@users.noreply.github.com> Date: Thu, 10 Nov 2022 11:00:04 -0500 Subject: [PATCH] Split OAB import (#613) * Split OAB import to have two new importers. One that runs the OAB import on only pubs that have DOIs, and one that run the OAB import on only pubs that don't have DOIs. Tests and rake tasks. * Adds sleep to without_doi import * Update open_access_button_publication_importer.rb --- ...open_access_button_publication_importer.rb | 31 ++++++++++ lib/tasks/imports.rake | 10 ++++ ...access_button_publication_importer_spec.rb | 56 +++++++++++++++++++ 3 files changed, 97 insertions(+) diff --git a/app/importers/open_access_button_publication_importer.rb b/app/importers/open_access_button_publication_importer.rb index 936dbd740..943ee8a4e 100644 --- a/app/importers/open_access_button_publication_importer.rb +++ b/app/importers/open_access_button_publication_importer.rb @@ -23,6 +23,29 @@ def import_new pbar.finish end + def import_with_doi + pbar = ProgressBarTTY.create(title: 'Importing publication data from Open Access Button', + total: doi_pubs.count) + + doi_pubs.find_each do |p| + query_open_access_button_for(p) + pbar.increment + end + pbar.finish + end + + def import_without_doi + pbar = ProgressBarTTY.create(title: 'Importing publication data from Open Access Button', + total: no_doi_pubs.count) + + no_doi_pubs.find_each do |p| + query_open_access_button_for(p) + pbar.increment + sleep 1 unless Rails.env.test? + end + pbar.finish + end + private def all_pubs @@ -33,6 +56,14 @@ def new_pubs all_pubs.where(open_access_button_last_checked_at: nil) end + def doi_pubs + all_pubs.where("doi IS NOT NULL AND doi <> ''") + end + + def no_doi_pubs + all_pubs.where("doi IS NULL OR doi = ''") + end + def query_open_access_button_for(publication) oab_json = nil find_url = if publication.doi.present? diff --git a/lib/tasks/imports.rake b/lib/tasks/imports.rake index 15a2766e4..43c825a68 100644 --- a/lib/tasks/imports.rake +++ b/lib/tasks/imports.rake @@ -34,6 +34,16 @@ namespace :import do OpenAccessButtonPublicationImporter.new.import_new end + desc 'Import Open Access Button publication URLs for publications that have a DOI' + task with_doi_open_access_button: :environment do + OpenAccessButtonPublicationImporter.new.import_with_doi + end + + desc 'Import Open Access Button publication URLs for publications that do not have a DOI' + task without_doi_open_access_button: :environment do + OpenAccessButtonPublicationImporter.new.import_without_doi + end + desc 'Import Unpaywall publication metadata' task unpaywall: :environment do UnpaywallPublicationImporter.new.import_all diff --git a/spec/component/importers/open_access_button_publication_importer_spec.rb b/spec/component/importers/open_access_button_publication_importer_spec.rb index e0b1ec79b..ebed69d31 100644 --- a/spec/component/importers/open_access_button_publication_importer_spec.rb +++ b/spec/component/importers/open_access_button_publication_importer_spec.rb @@ -389,4 +389,60 @@ end end end + + describe '#import_with_doi' do + let!(:pub1) do + create :publication, + doi: 'https://doi.org/10.000/doi1', + title: 'Stable characteristic evolution of generic three-dimensional single-black-hole spacetimes' + end + let!(:pub2) do + create :publication, + doi: nil, + title: 'Publication 2' + end + + before do + allow(HTTParty).to receive(:get).with('https://api.openaccessbutton.org/find?id=10.000%2Fdoi1') + .and_return(File.read(Rails.root.join('spec', 'fixtures', 'oab3.json'))) + end + + it 'creates a new open access location for the publication' do + expect { importer.import_with_doi }.to change(OpenAccessLocation, :count).by 1 + expect(pub1.reload.open_access_locations.count).to eq 1 + end + + it 'updates Open Access Button check timestamp on the publication' do + importer.import_with_doi + expect(pub1.reload.open_access_button_last_checked_at).to be_within(1.minute).of(Time.zone.now) + end + end + + describe '#import_without_doi' do + let!(:pub1) do + create :publication, + doi: '', + title: 'Stable characteristic evolution of generic three-dimensional single-black-hole spacetimes' + end + let!(:pub2) do + create :publication, + doi: 'https://doi.org/10.000/doi1', + title: 'Publication 2' + end + + before do + allow(HTTParty).to receive(:get).with('https://api.openaccessbutton.org/find?title=Stable+characteristic+evolution+of+generic+three-dimensional+single-black-hole+spacetimes') + .and_return(File.read(Rails.root.join('spec', 'fixtures', 'oab3.json'))) + end + + it 'creates a new open access location for the publication' do + expect { importer.import_without_doi }.to change(OpenAccessLocation, :count).by 1 + expect(pub1.reload.open_access_locations.count).to eq 1 + end + + it 'updates Open Access Button check timestamp on the publication' do + importer.import_without_doi + expect(pub1.reload.open_access_button_last_checked_at).to be_within(1.minute).of(Time.zone.now) + end + end end