Skip to content

Commit

Permalink
Unpaywall import changes (#574)
Browse files Browse the repository at this point in the history
* Updating unpaywall_last_checked_at whenever a publications is checked regardless of doi or title match (since we are now checking all publications no matter what).  Requiring title match for oa status update.

* niftany
  • Loading branch information
ajkiessl authored Oct 11, 2022
1 parent 4f67795 commit 1eff6a6
Show file tree
Hide file tree
Showing 6 changed files with 183 additions and 163 deletions.
14 changes: 7 additions & 7 deletions app/importers/unpaywall_publication_importer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def update_publication(publication, unpaywall_json)
unpaywall_locations = unpaywall_json['oa_locations'].presence || []
existing_doi = true
else
unpaywall_title = unpaywall_json['results'].nil? ? '' : unpaywall_json['results'].first['response']['title']
unpaywall_result = unpaywall_json['results'].nil? ? '' : unpaywall_json['results'].first
unpaywall_title = unpaywall_result.present? ? unpaywall_result['response']['title'] : ''
unpaywall_locations = if title_match?(unpaywall_title, publication.title)
publication.doi = DOISanitizer.new(unpaywall_json['results'].first['response']['doi']).url
unpaywall_json['results'].first['response']['oa_locations'].presence || []
Expand Down Expand Up @@ -95,14 +96,13 @@ def update_publication(publication, unpaywall_json)

publication.open_access_status = if existing_doi
unpaywall_json['oa_status']
else
unpaywall_json['results'].nil? ? nil : unpaywall_json['results'].first['response']['oa_status']
elsif title_match?(unpaywall_title, publication.title)
unpaywall_result = unpaywall_json['results'].nil? ? nil : unpaywall_json['results'].first
unpaywall_result.present? ? unpaywall_result['response']['oa_status'] : nil
end
publication.unpaywall_last_checked_at = Time.zone.now

if publication.doi.present? || title_match?(unpaywall_title, publication.title)
publication.save!
end
publication.save!
end
end

Expand All @@ -111,7 +111,7 @@ def query_unpaywall_for(publication)
doi_url_path = Addressable::URI.encode(publication.doi_url_path)
find_url = "https://api.unpaywall.org/v2/#{doi_url_path}[email protected]"
else
find_url = "https://api.unpaywall.org/v2/search/?query=#{publication.title}&[email protected]"
find_url = "https://api.unpaywall.org/v2/search/?query=#{CGI.escape(publication.title)}&[email protected]"
end

JSON.parse(HttpService.get(find_url))
Expand Down
16 changes: 8 additions & 8 deletions spec/component/importers/unpaywall_publication_importer_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
expect { importer.import_all }.not_to change(OpenAccessLocation, :count)
end

it "does not update the publication's Unpaywall check timestamp" do
it "updates the publication's Unpaywall check timestamp" do
importer.import_all
expect(pub.reload.unpaywall_last_checked_at).to be_nil
expect(pub.reload.unpaywall_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end

it 'does not update the open access status on the publication' do
Expand Down Expand Up @@ -97,9 +97,9 @@
expect { importer.import_all }.not_to change(OpenAccessLocation, :count)
end

it "does not update the publication's Unpaywall check timestamp" do
it "updates the publication's Unpaywall check timestamp" do
importer.import_all
expect(pub.reload.unpaywall_last_checked_at).to be_nil
expect(pub.reload.unpaywall_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end

it 'does not update the open access status on the publication' do
Expand Down Expand Up @@ -467,9 +467,9 @@
expect { importer.import_all }.not_to change(OpenAccessLocation, :count)
end

it "does not update the publication's Unpaywall check timestamp" do
it "updates the publication's Unpaywall check timestamp" do
importer.import_all
expect(pub.reload.unpaywall_last_checked_at).to be_nil
expect(pub.reload.unpaywall_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end

it 'does not update the open access status on the publication' do
Expand Down Expand Up @@ -521,9 +521,9 @@
expect { importer.import_all }.not_to change(OpenAccessLocation, :count)
end

it "does not update the publication's Unpaywall check timestamp" do
it "updates the publication's Unpaywall check timestamp" do
importer.import_all
expect(pub.reload.unpaywall_last_checked_at).to be_nil
expect(pub.reload.unpaywall_last_checked_at).to be_within(1.minute).of(Time.zone.now)
end

it 'does not update the open access status on the publication' do
Expand Down
Loading

0 comments on commit 1eff6a6

Please sign in to comment.