Skip to content

Commit

Permalink
Split OAB import (#613)
Browse files Browse the repository at this point in the history
* Split OAB import to have two new importers.  One that runs the OAB import on only pubs that have DOIs, and one that run the OAB import on only pubs that don't have DOIs.  Tests and rake tasks.

* Adds sleep to without_doi import

* Update open_access_button_publication_importer.rb
  • Loading branch information
ajkiessl authored Nov 10, 2022
1 parent c2eab1a commit cdecd04
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 0 deletions.
31 changes: 31 additions & 0 deletions app/importers/open_access_button_publication_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,29 @@ def import_new
pbar.finish
end

def import_with_doi
pbar = ProgressBarTTY.create(title: 'Importing publication data from Open Access Button',
total: doi_pubs.count)

doi_pubs.find_each do |p|
query_open_access_button_for(p)
pbar.increment
end
pbar.finish
end

def import_without_doi
pbar = ProgressBarTTY.create(title: 'Importing publication data from Open Access Button',
total: no_doi_pubs.count)

no_doi_pubs.find_each do |p|
query_open_access_button_for(p)
pbar.increment
sleep 1 unless Rails.env.test?
end
pbar.finish
end

private

def all_pubs
Expand All @@ -33,6 +56,14 @@ def new_pubs
all_pubs.where(open_access_button_last_checked_at: nil)
end

def doi_pubs
all_pubs.where("doi IS NOT NULL AND doi <> ''")
end

def no_doi_pubs
all_pubs.where("doi IS NULL OR doi = ''")
end

def query_open_access_button_for(publication)
oab_json = nil
find_url = if publication.doi.present?
Expand Down
10 changes: 10 additions & 0 deletions lib/tasks/imports.rake
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ namespace :import do
OpenAccessButtonPublicationImporter.new.import_new
end

desc 'Import Open Access Button publication URLs for publications that have a DOI'
task with_doi_open_access_button: :environment do
OpenAccessButtonPublicationImporter.new.import_with_doi
end

desc 'Import Open Access Button publication URLs for publications that do not have a DOI'
task without_doi_open_access_button: :environment do
OpenAccessButtonPublicationImporter.new.import_without_doi
end

desc 'Import Unpaywall publication metadata'
task unpaywall: :environment do
UnpaywallPublicationImporter.new.import_all
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,4 +389,60 @@
end
end
end

describe '#import_with_doi' do
let!(:pub1) do
create :publication,
doi: 'https://doi.org/10.000/doi1',
title: 'Stable characteristic evolution of generic three-dimensional single-black-hole spacetimes'
end
let!(:pub2) do
create :publication,
doi: nil,
title: 'Publication 2'
end

before do
allow(HTTParty).to receive(:get).with('https://api.openaccessbutton.org/find?id=10.000%2Fdoi1')
.and_return(File.read(Rails.root.join('spec', 'fixtures', 'oab3.json')))
end

it 'creates a new open access location for the publication' do
expect { importer.import_with_doi }.to change(OpenAccessLocation, :count).by 1
expect(pub1.reload.open_access_locations.count).to eq 1
end

it 'updates Open Access Button check timestamp on the publication' do
importer.import_with_doi
expect(pub1.reload.open_access_button_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end
end

describe '#import_without_doi' do
let!(:pub1) do
create :publication,
doi: '',
title: 'Stable characteristic evolution of generic three-dimensional single-black-hole spacetimes'
end
let!(:pub2) do
create :publication,
doi: 'https://doi.org/10.000/doi1',
title: 'Publication 2'
end

before do
allow(HTTParty).to receive(:get).with('https://api.openaccessbutton.org/find?title=Stable+characteristic+evolution+of+generic+three-dimensional+single-black-hole+spacetimes')
.and_return(File.read(Rails.root.join('spec', 'fixtures', 'oab3.json')))
end

it 'creates a new open access location for the publication' do
expect { importer.import_without_doi }.to change(OpenAccessLocation, :count).by 1
expect(pub1.reload.open_access_locations.count).to eq 1
end

it 'updates Open Access Button check timestamp on the publication' do
importer.import_without_doi
expect(pub1.reload.open_access_button_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end
end
end

0 comments on commit cdecd04

Please sign in to comment.