Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ECO-277/find-upvs-urls-with-sparql #138

Merged
merged 3 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
class Upvs::FetchPublicAuthorityActiveEdesksListJob < ApplicationJob
queue_as :upvs

DATASET_URL = 'https://data.slovensko.sk/download?id=794af827-132b-46d1-98e7-ccd73eda26e0'

def perform(downloader: HarvesterUtils::Downloader)
csv_file = downloader.download_file(DATASET_URL)
def perform(dataset_url, downloader: HarvesterUtils::Downloader)
csv_file = downloader.download_file(dataset_url)
csv_options = { col_sep: File.open(csv_file) { |f| f.readline }.include?(';') ? ';' : ',', headers: true }

TemporaryPublicAuthorityActiveEdesk.transaction do
Expand Down
6 changes: 2 additions & 4 deletions app/jobs/upvs/fetch_public_authority_edesks_list_job.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
class Upvs::FetchPublicAuthorityEdesksListJob < ApplicationJob
queue_as :upvs

DATASET_URL = 'https://data.slovensko.sk/download?id=7c70f6c9-1777-4d8a-8711-f1dfd2359620'

def perform(downloader: HarvesterUtils::Downloader)
csv_file = downloader.download_file(DATASET_URL)
def perform(dataset_url, downloader: HarvesterUtils::Downloader)
csv_file = downloader.download_file(dataset_url)
csv_options = { col_sep: File.open(csv_file) { |f| f.readline }.include?(';') ? ';' : ',', headers: true }

TemporaryPublicAuthorityEdesk.transaction do
Expand Down
6 changes: 2 additions & 4 deletions app/jobs/upvs/fetch_services_with_forms_list_job.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
class Upvs::FetchServicesWithFormsListJob < ApplicationJob
queue_as :upvs

DATASET_URL = 'https://data.slovensko.sk/download?id=c78de203-caa5-4d1d-9496-975f0e2567d1'

def perform(downloader: HarvesterUtils::Downloader)
zip_file = downloader.download_file(DATASET_URL)
def perform(dataset_url, downloader: HarvesterUtils::Downloader)
zip_file = downloader.download_file(dataset_url)
csv_file = downloader.extract_csv(zip_file)

csv_options = {
Expand Down
40 changes: 40 additions & 0 deletions app/jobs/upvs/find_public_authority_active_edesks_list_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'faraday'

class Upvs::FindPublicAuthorityActiveEdesksListJob < ApplicationJob
queue_as :upvs

SET_URL = 'https://data.gov.sk/set/28c2c7ee-6a43-4746-9e3f-35e977f6f03d'
BASE_URL = 'https://data.slovensko.sk/api/sparql'
NUMBER_OF_DATASET_FORMATS = 2

def perform
query = "
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
SELECT ?downloadURL
WHERE {
<#{SET_URL}> dct:hasPart ?dataset .
?dataset dcat:distribution ?distribution .
?dataset dct:issued ?issued .
?dataset dct:modified ?modified .
?dataset dcat:distribution ?distribution .
?distribution dcat:downloadURL ?downloadURL .
}
ORDER BY DESC(?modified)
LIMIT #{NUMBER_OF_DATASET_FORMATS}
"

response = Faraday.get(BASE_URL, {query: query})

if response.success?
dataset_url = response.body.split("\n")[1..].map(&:strip)
.find { |url| Faraday.get(url).headers['Content-Disposition']&.include?('.csv') }

raise "Dataset URL not found in response. Job: Upvs::FindPublicAuthorityActiveEdesksListJob" if dataset_url.nil?

Upvs::FetchPublicAuthorityActiveEdesksListJob.perform_now(dataset_url)
else
raise "Request to find latest dataset URL for set: #{SET_URL} failed with status code #{response.status}"
end
end
end
40 changes: 40 additions & 0 deletions app/jobs/upvs/find_public_authority_edesks_list_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'faraday'

class Upvs::FindPublicAuthorityEdesksListJob < ApplicationJob
queue_as :upvs

SET_URL = 'https://data.gov.sk/set/8572f288-0186-4bc2-8d12-9eb324ff47bd'
BASE_URL = 'https://data.slovensko.sk/api/sparql'
NUMBER_OF_DATASET_FORMATS = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Toto je potrebne udavat?


def perform
query = "
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
SELECT ?downloadURL
WHERE {
<#{SET_URL}> dct:hasPart ?dataset .
?dataset dcat:distribution ?distribution .
?dataset dct:issued ?issued .
?dataset dct:modified ?modified .
?dataset dcat:distribution ?distribution .
?distribution dcat:downloadURL ?downloadURL .
}
ORDER BY DESC(?modified)
LIMIT #{NUMBER_OF_DATASET_FORMATS}
"

response = Faraday.get(BASE_URL, {query: query})

if response.success?
dataset_url = response.body.split("\n")[1..].map(&:strip)
.find { |url| Faraday.get(url).headers['Content-Disposition']&.include?('.csv') }

raise "Dataset URL not found in response. Job: Upvs::FindPublicAuthorityEdesksListJob" if dataset_url.nil?

Upvs::FetchPublicAuthorityEdesksListJob.perform_now(dataset_url)
else
raise "Request to find latest dataset URL for set: #{SET_URL} failed with status code #{response.status}"
end
end
end
40 changes: 40 additions & 0 deletions app/jobs/upvs/find_services_with_forms_list_job.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
require 'faraday'

class Upvs::FindServicesWithFormsListJob < ApplicationJob
queue_as :upvs

SET_URL = 'https://data.gov.sk/set/9eeac271-ae1c-40f8-bb90-7089a9fcb659'
BASE_URL = 'https://data.slovensko.sk/api/sparql'
NUMBER_OF_DATASET_FORMATS = 1

def perform
query = "
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
SELECT ?downloadURL
WHERE {
<#{SET_URL}> dct:hasPart ?dataset .
?dataset dcat:distribution ?distribution .
?dataset dct:issued ?issued .
?dataset dct:modified ?modified .
?dataset dcat:distribution ?distribution .
?distribution dcat:downloadURL ?downloadURL .
}
ORDER BY DESC(?modified)
LIMIT #{NUMBER_OF_DATASET_FORMATS}
"

response = Faraday.get(BASE_URL, {query: query})

if response.success?
dataset_url = response.body.split("\n")[1..].map(&:strip)
.find { |url| Faraday.get(url).headers['Content-Disposition']&.include?('.zip') }

raise "Dataset URL not found in response. Job: Upvs::FindServicesWithFormsListJob" if dataset_url.nil?

Upvs::FetchServicesWithFormsListJob.perform_now(dataset_url)
else
raise "Request to find latest dataset URL for set: #{SET_URL} failed with status code #{response.status}"
end
end
end
6 changes: 3 additions & 3 deletions lib/tasks/upvs.rake
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
namespace :upvs do
desc 'Sync all public authority eDesks'
task 'public_authority_edesks:sync' => :environment do
Upvs::FetchPublicAuthorityEdesksListJob.perform_later
Upvs::FindPublicAuthorityEdesksListJob.perform_later
end

desc 'Sync public authority eDesks'
task 'public_authority_active_edesks:sync' => :environment do
Upvs::FetchPublicAuthorityActiveEdesksListJob.perform_later
Upvs::FindPublicAuthorityActiveEdesksListJob.perform_later
end

desc 'Sync services with forms list'
task 'services_with_forms:sync' => :environment do
Upvs::FetchServicesWithFormsListJob.perform_later
Upvs::FindServicesWithFormsListJob.perform_later
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
it 'downloads and imports public authority eDesks in V1 format' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1.csv'))

subject.perform(downloader: downloader)
subject.perform(url ,downloader: downloader)

expect(Upvs::PublicAuthorityActiveEdesk.first).to have_attributes(
uri: 'ico://sk/00332674',
Expand All @@ -27,7 +27,7 @@
expect(Upvs::PublicAuthorityActiveEdesk.count).to eq(10)
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1.csv'))

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::PublicAuthorityActiveEdesk.count).to eq(7)
end
Expand All @@ -37,7 +37,7 @@
it 'does not import public authority eDesks' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error(RuntimeError)
expect { subject.perform(url, downloader: downloader) }.to raise_error(RuntimeError)

expect(Upvs::PublicAuthorityActiveEdesk.count).to eq(0)
end
Expand All @@ -48,22 +48,22 @@
expect(Upvs::PublicAuthorityActiveEdesk.count).to eq(10)
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error(RuntimeError)
expect { subject.perform(url, downloader: downloader) }.to raise_error(RuntimeError)

expect(Upvs::PublicAuthorityActiveEdesk.count).to eq(10)
end

it 'raises custom error' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error('ico://sk/99166260 does not match 166260')
expect { subject.perform(url, downloader: downloader) }.to raise_error('ico://sk/99166260 does not match 166260')
end

it 'does not raise custom error if only leading zeros difference' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1-missing-leading-zeros.csv'))

# subject.perform(downloader: downloader)
expect { subject.perform(downloader: downloader) }.not_to raise_error
expect { subject.perform(url, downloader: downloader) }.not_to raise_error

expect(Upvs::PublicAuthorityActiveEdesk.last).to have_attributes(
uri: 'ico://sk/214973_10001',
Expand All @@ -77,7 +77,7 @@
it 'raises custom error' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/active-edesks-v1-incorrect-encoding.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error('Incorrect encoding')
expect { subject.perform(url, downloader: downloader) }.to raise_error('Incorrect encoding')
end
end
end
Expand Down
14 changes: 7 additions & 7 deletions spec/jobs/upvs/fetch_public_authority_edesks_list_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
it 'downloads and imports all public authority eDesks' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks.csv'))

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::PublicAuthorityEdesk.first).to have_attributes(
uri: 'ico://sk/00332674',
Expand All @@ -27,7 +27,7 @@
expect(Upvs::PublicAuthorityEdesk.count).to eq(10)
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks.csv'))

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::PublicAuthorityEdesk.count).to eq(7)
end
Expand All @@ -37,7 +37,7 @@
it 'does not import public authority eDesks' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error(RuntimeError)
expect { subject.perform(url, downloader: downloader) }.to raise_error(RuntimeError)

expect(Upvs::PublicAuthorityEdesk.count).to eq(0)
end
Expand All @@ -48,21 +48,21 @@
expect(Upvs::PublicAuthorityEdesk.count).to eq(10)
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error(RuntimeError)
expect { subject.perform(url, downloader: downloader) }.to raise_error(RuntimeError)

expect(Upvs::PublicAuthorityEdesk.count).to eq(10)
end

it 'raises custom error' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks-not-matching.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error('ico://sk/99166260 does not match 166260')
expect { subject.perform(url, downloader: downloader) }.to raise_error('ico://sk/99166260 does not match 166260')
end

it 'does not raise custom error if only leading zeros difference' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks-missing-leading-zeros.csv'))

expect { subject.perform(downloader: downloader) }.not_to raise_error
expect { subject.perform(url, downloader: downloader) }.not_to raise_error

expect(Upvs::PublicAuthorityEdesk.last).to have_attributes(
uri: 'ico://sk/214973_10001',
Expand All @@ -76,7 +76,7 @@
it 'raises custom error' do
expect(downloader).to receive(:download_file).with(url).and_return(fixture_filepath('upvs/edesks-incorrect-encoding.csv'))

expect { subject.perform(downloader: downloader) }.to raise_error('Incorrect encoding')
expect { subject.perform(url, downloader: downloader) }.to raise_error('Incorrect encoding')
end
end
end
Expand Down
10 changes: 5 additions & 5 deletions spec/jobs/upvs/fetch_services_with_forms_list_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
HarvesterUtils::Downloader.extract_csv(zip_file)
end

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::ServiceWithForm.first).to have_attributes(
instance_id: 2083,
Expand All @@ -36,7 +36,7 @@
HarvesterUtils::Downloader.extract_csv(zip_file)
end

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::ServiceWithForm.first).to have_attributes(
instance_id: 2082,
Expand All @@ -60,7 +60,7 @@
HarvesterUtils::Downloader.extract_csv(zip_file)
end

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::ServiceWithForm.first).to have_attributes(
instance_id: 2088,
Expand All @@ -85,7 +85,7 @@
HarvesterUtils::Downloader.extract_csv(zip_file)
end

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::ServiceWithForm.first).to have_attributes(
instance_id: 29644,
Expand Down Expand Up @@ -114,7 +114,7 @@
HarvesterUtils::Downloader.extract_csv(zip_file)
end

subject.perform(downloader: downloader)
subject.perform(url, downloader: downloader)

expect(Upvs::ServiceWithForm.count).to eq(20)
end
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
require 'rails_helper'

RSpec.describe Upvs::FindPublicAuthorityActiveEdesksListJob, type: :job do
include ActiveJob::TestHelper

let(:csv_url) { 'https://data.slovensko.sk/download?id=794af827-132b-46d1-98e7-ccd73eda26e0' }
let(:xlsx_url) { 'https://data.slovensko.sk/download?id=2350ef8f-9b8f-43c0-b075-dda38dd052d1' }

after do
clear_enqueued_jobs
end

describe "#perform" do
before do
stub_request(:get, "https://data.slovensko.sk/api/sparql")
.with(query: hash_including({"query": kind_of(String)}))
.to_return(status: 200, body: "downloadURL\n#{csv_url}\n#{xlsx_url}\n")

stub_request(:get, csv_url).to_return(headers: {'Content-Disposition': 'filename.csv'})
stub_request(:get, xlsx_url).to_return(headers: {'Content-Disposition': 'filename.xlsx'})
end

it 'calls FetchPublicAuthorityActiveEdesksListJob with correct dataset url' do
expect(Upvs::FetchPublicAuthorityActiveEdesksListJob).to receive(:perform_now).with(csv_url)
subject.perform
end

it 'raises an error when the request is unsuccessful' do
stub_request(:get, "https://data.slovensko.sk/api/sparql")
.with(query: hash_including({"query": kind_of(String)}))
.to_return(status: 500, body: "Internal Server Error\n")

expect {
subject.perform
}.to raise_error(/Request to find latest dataset URL for set:/)
end
end
end
Loading
Loading