diff --git a/Gemfile b/Gemfile index 07cbd9b2..dc1ff5bc 100644 --- a/Gemfile +++ b/Gemfile @@ -83,3 +83,5 @@ gem "sidekiq", "~> 6.5" gem "sidekiq-cron", "~> 1.10" gem "mail-notify", "~> 1.1" + +gem "rubyXL", "~> 3.4" diff --git a/Gemfile.lock b/Gemfile.lock index fc87f53e..8b0c87bf 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -395,6 +395,10 @@ GEM rubocop-factory_bot (~> 2.22) ruby-progressbar (1.13.0) ruby2_keywords (0.0.5) + rubyXL (3.4.25) + nokogiri (>= 1.10.8) + rubyzip (>= 1.3.0) + rubyzip (2.3.2) sanitize (6.1.0) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -506,6 +510,7 @@ DEPENDENCIES rubocop-performance rubocop-rails rubocop-rspec + rubyXL (~> 3.4) scenic sentry-rails (~> 5.11) shoulda-matchers (~> 5.0) diff --git a/app/models/reports/home_office.rb b/app/models/reports/home_office.rb index 291eb5ea..7b22ce82 100644 --- a/app/models/reports/home_office.rb +++ b/app/models/reports/home_office.rb @@ -2,41 +2,66 @@ module Reports class HomeOffice < Base - def csv - CSV.generate do |csv| - csv << header - rows.each { |row| csv << row } - end + file_ext "xlsx" + + HEADER_MAPPINGS_KEY = "header_mappings" + WORKSHEET_NAME_KEY = "worksheet_name" + + def generate + cell_coords.each { worksheet.add_cell(*_1) } + workbook.stream.string end def post_generation_hook - applications.update_all(home_office_csv_downloaded_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations + base_query.update_all(home_office_csv_downloaded_at: Time.zone.now) # rubocop:disable Rails/SkipsModelValidations end private - def rows - applications.map do |application| - [ - application.urn, - application.applicant.full_name, - application.applicant.date_of_birth, - nil, - application.applicant.nationality, - nil, - application.applicant.passport_number, - nil, - nil, - nil, - nil, - nil, - nil, - ] + def workbook + @workbook ||= ::RubyXL::Parser.parse_buffer(template.file) + end + + def template + @template ||= ReportTemplate.find_by!(report_class: self.class.name) + end + + def worksheet + @worksheet ||= workbook[worksheet_name] + end + + def worksheet_name + template.config.fetch(WORKSHEET_NAME_KEY) + end + + def header_mappings + template.config.fetch(HEADER_MAPPINGS_KEY) + end + + def headers_with_column_index + @headers_with_column_index ||= worksheet[0] + .cells + .each + .with_index + .map { |v, i| [v.value, i] } + end + + def sheet_col_number(header_mapping) + _, col_number = headers_with_column_index.detect { |(header, _)| header == header_mapping } + + col_number + end + + def cell_coords + dataset.each.with_index.flat_map do |cols, sheet_row_number| + header_mappings.each.with_index.map do |(header_mapping, _), col_idx| + [sheet_row_number + 1, sheet_col_number(header_mapping), cols[col_idx].to_s] + end end end - def applications - @applications ||= Application + def base_query + @base_query ||= Application .joins(:application_progress) .includes(:applicant) .where.not(application_progresses: { initial_checks_completed_at: nil }) @@ -49,22 +74,18 @@ def applications ) end - def header - [ - "ID", - "Full Name", - "DOB", - "Gender", - "Nationality", - "Place of Birth", - "Passport Number", - "National Insurance Number", - "Address", - "Postcode", - "Email", - "Telephone", - "Reference", - ] + def dataset + base_query.pluck(*dataset_fields) + end + + def dataset_fields + header_mappings.values.map do |cols| + if cols.size == 1 + cols.first + else + Arel.sql("CONCAT(#{cols.join(', \' \', ')})") + end + end end end end diff --git a/app/validators/home_office_report_config_validator.rb b/app/validators/home_office_report_config_validator.rb new file mode 100644 index 00000000..07646c51 --- /dev/null +++ b/app/validators/home_office_report_config_validator.rb @@ -0,0 +1,52 @@ +# Example config +# config: +# { +# worksheet_name: "Data", +# header_mapping: { +# "ID (Mandatory)" => %w[urn], +# "Full Name/ Organisation Name" => %w[applicants.given_name applicants.middle_name applicants.family_name], +# "DOB" => %w[applicants.date_of_birth], +# "Nationality" => %w[applicants.nationality], +# "Passport Number" => %w[applicants.passport_number], +# } +# } +# + +class HomeOfficeReportConfigValidator + def initialize(record) + @record = record + end + + def validate + return if record.report_class != Reports::HomeOffice.name + + validate_workbook + validate_config_worksheet_name + validate_worksheet + validate_config_header_mappings + end + +private + + attr_reader :record, :workbook + + def validate_workbook + @workbook = ::RubyXL::Parser.parse_buffer(record.file.dup) + rescue StandardError + record.errors.add(:file, :ho_invalid) + end + + def validate_worksheet + return if workbook.blank? + + record.errors.add(:config, :ho_invalid_worksheet_name) if workbook[record.config.fetch(Reports::HomeOffice::WORKSHEET_NAME_KEY, nil)].blank? + end + + def validate_config_worksheet_name + record.errors.add(:config, :ho_missing_worksheet_name) if record.config.fetch(Reports::HomeOffice::WORKSHEET_NAME_KEY, nil).blank? + end + + def validate_config_header_mappings + record.errors.add(:config, :ho_missing_header_mappings) if record.config.fetch(Reports::HomeOffice::HEADER_MAPPINGS_KEY, nil).blank? + end +end diff --git a/config/brakeman.ignore b/config/brakeman.ignore index b705f888..c5e8628b 100644 --- a/config/brakeman.ignore +++ b/config/brakeman.ignore @@ -183,8 +183,31 @@ 89 ], "note": "This is a false positive because the field argument in the method is provided by the Step class required_fields." + }, + { + "warning_type": "SQL Injection", + "warning_code": 0, + "fingerprint": "8df93197e95285f7b6b35ce2d819c93bcd71204a260dbd1b84e59a4962ec5e43", + "check_name": "SQL", + "message": "Possible SQL injection", + "file": "app/models/reports/home_office.rb", + "line": 86, + "link": "https://brakemanscanner.org/docs/warning_types/sql_injection/", + "code": "Arel.sql(\"CONCAT(#{cols.join(\", ' ', \")})\")", + "render_path": null, + "location": { + "type": "method", + "class": "Reports::HomeOffice", + "method": "dataset_fields" + }, + "user_input": "cols.join(\", ' ', \")", + "confidence": "Weak", + "cwe_id": [ + 89 + ], + "note": "" } ], - "updated": "2023-10-02 14:11:36 +0100", + "updated": "2023-10-05 14:55:00 +0100", "brakeman_version": "6.0.1" } diff --git a/config/locales/en.yml b/config/locales/en.yml index 9911cdf7..6560f938 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -55,8 +55,8 @@ en: report_template: attributes: file: - invalid: "File parsing error" + ho_invalid: "File parsing error" config: - missing_header_mappings: "config.header_mappings must be present" - missing_worksheet_name: "config.worksheet_name must be present" - invalid_worksheet_name: "config.worksheet_name not present in file" + ho_missing_header_mappings: "config.header_mappings must be present" + ho_missing_worksheet_name: "config.worksheet_name must be present" + ho_invalid_worksheet_name: "config.worksheet_name not present in file" diff --git a/spec/factories/report_templates.rb b/spec/factories/report_templates.rb index 897eb4a8..470014cc 100644 --- a/spec/factories/report_templates.rb +++ b/spec/factories/report_templates.rb @@ -27,7 +27,7 @@ report_class { "Reports::HomeOffice" } config do { - "worksheet_name" => "Data", + "worksheet_name" => "TestData", "header_mappings" => { "Column A" => %w[urn], "bar" => %w[applicants.given_name applicants.family_name], @@ -35,5 +35,23 @@ } end end + + factory :mocked_home_office_report_template do + file { Rails.root.join("spec/fixtures/test_homeoffice_template.xlsx").read } + filename { "test_homeoffice_template.xlsx" } + report_class { "Reports::HomeOffice" } + config do + { + "worksheet_name" => "Data", + "header_mappings" => { + "ID (Mandatory)" => %w[urn], + "Full Name/ Organisation Name" => %w[applicants.given_name applicants.middle_name applicants.family_name], + "DOB" => %w[applicants.date_of_birth], + "Nationality" => %w[applicants.nationality], + "Passport Number" => %w[applicants.passport_number], + }, + } + end + end end end diff --git a/spec/features/admin_console/reports_spec.rb b/spec/features/admin_console/reports_spec.rb index 55a2dea5..8156272e 100644 --- a/spec/features/admin_console/reports_spec.rb +++ b/spec/features/admin_console/reports_spec.rb @@ -54,7 +54,7 @@ def then_the_standing_data_csv_report_is_downloaded end def then_the_home_office_csv_report_is_downloaded - expect(page.response_headers["Content-Type"]).to match(/application\/vnd.openxmlformats-officedocument.spreadsheetml.sheet/) + expect(page.response_headers["Content-Type"]).to match(/application\/octet-stream/) expect(page.response_headers["Content-Disposition"]).to include "attachment" expect(page.response_headers["Content-Disposition"]).to match(/filename="reports-home-office.*/) end @@ -78,6 +78,7 @@ def then_the_qa_report_csv_report_is_downloaded end def and_i_click_on_the_home_office_csv_link + create(:mocked_home_office_report_template) within ".home-office" do click_on "Download" end diff --git a/spec/fixtures/test_homeoffice_template.xlsx b/spec/fixtures/test_homeoffice_template.xlsx index c99d75b4..6d8b2fd5 100644 Binary files a/spec/fixtures/test_homeoffice_template.xlsx and b/spec/fixtures/test_homeoffice_template.xlsx differ diff --git a/spec/models/reports/home_office_spec.rb b/spec/models/reports/home_office_spec.rb index 673b275b..ffa5f560 100644 --- a/spec/models/reports/home_office_spec.rb +++ b/spec/models/reports/home_office_spec.rb @@ -6,12 +6,16 @@ module Reports describe HomeOffice do include ActiveSupport::Testing::TimeHelpers + before { create(:mocked_home_office_report_template) } + subject(:report) { described_class.new } + let(:headers) { report.send(:worksheet)[0].cells.map(&:value) } + it "returns the filename of the Report" do frozen_time = Time.zone.local(2023, 7, 17, 12, 30, 45) travel_to frozen_time do - expected_name = "reports-home-office-20230717-123045.csv" + expected_name = "reports-home-office-20230717-123045.xlsx" report = described_class.new actual_name = report.filename @@ -20,11 +24,23 @@ module Reports end end - describe "#csv" do + describe "cell_coords" do + let(:cell_coords) { report.send(:cell_coords) } + let(:app) { create(:application, application_progress: build(:application_progress, :home_office_pending)) } + + before { app } + + it "formats date field" do + expect(cell_coords.first).to include(app.applicant.date_of_birth.to_s) + end + end + + describe "#dataset" do + let(:dataset) { report.send(:dataset) } + it "returns applicants who have completed initial checks but not home office checks" do app = create(:application, application_progress: build(:application_progress, :home_office_pending)) - - expect(report.csv).to include(app.urn) + expect(dataset.first).to include(app.urn) end it "does not return rejected applicants" do @@ -33,73 +49,58 @@ module Reports rejection_completed_at: Time.zone.now) app = create(:application, application_progress: progress) - expect(report.csv).not_to include(app.urn) + expect(dataset).not_to include(app.urn) end it "does not return applicants who have not completed initial checks" do app = create(:application, application_progress: build(:application_progress, initial_checks_completed_at: nil)) - - expect(report.csv).not_to include(app.urn) + expect(dataset).not_to include(app.urn) end it "does not return applicants who have completed home office checks" do app = create(:application, application_progress: build(:application_progress, :initial_checks_completed, home_office_checks_completed_at: Time.zone.now)) - expect(report.csv).not_to include(app.urn) + expect(dataset).not_to include(app.urn) end it "returns the data in CSV format" do application = create(:application, application_progress: build(:application_progress, :home_office_pending)) - expect(report.csv).to include([ - application.urn, - application.applicant.full_name, + expect(dataset).to contain_exactly([ application.applicant.date_of_birth, - nil, application.applicant.nationality, - nil, + application.urn, application.applicant.passport_number, - nil, - nil, - nil, - nil, - nil, - nil, - ].join(",")) + application.applicant.full_name, + ]) end it "returns the header in CSV format" do expected_header = [ - "ID", - "Full Name", "DOB", - "Gender", + "Dummy 1", + "Dummy 2", + "Full Name/ Organisation Name", + "ID (Mandatory)", "Nationality", - "Place of Birth", "Passport Number", - "National Insurance Number", - "Address", - "Postcode", - "Email", - "Telephone", - "Reference", - ].join(",") - - expect(report.csv).to include(expected_header) + ] + + expect(headers).to match_array(expected_header) end context "includes applications from the csv before invoking `post_generation_hook`" do let(:app) { create(:application, application_progress: build(:application_progress, :home_office_pending)) } - let(:csv) { report.csv } + let(:dataset) { report.send(:dataset) } before { app } - it { expect(csv).to include(app.urn) } + it { expect(dataset.first).to include(app.urn) } context "excludes applications from the csv after invoking `post_generation_hook`" do before { report.post_generation_hook } - it { expect(csv).not_to include(app.urn) } + it { expect(dataset.first).to be_nil } end end end diff --git a/spec/validators/home_office_report_config_validator_spec.rb b/spec/validators/home_office_report_config_validator_spec.rb new file mode 100644 index 00000000..dad124bd --- /dev/null +++ b/spec/validators/home_office_report_config_validator_spec.rb @@ -0,0 +1,70 @@ +require "rails_helper" + +describe HomeOfficeReportConfigValidator do + subject(:validator) { described_class.new(report_template) } + + let(:report_template) { build(:home_office_report_template, config:) } + + before { validator.validate } + + context "when not a home office report_template skip validation" do + let(:report_template) { build(:report_template) } + + it { expect(report_template.errors[:file]).to be_blank } + it { expect(report_template.errors[:config]).to be_blank } + end + + context "returns error on file" do + let(:report_template) { build(:home_office_report_template, file: "bad_file") } + + it { expect(report_template.errors[:file]).not_to be_blank } + end + + context "returns error on config" do + context "when missing worksheet_name" do + let(:config) do + { + "header_mappings" => { + "Column A" => %w[urn], + }, + } + end + + it { expect(report_template.errors[:config]).not_to be_blank } + end + + context "when worksheet_name does not exist is file" do + let(:config) do + { + "worksheet_name" => "unknown", + "header_mappings" => { + "Column A" => %w[urn], + }, + } + end + + it { expect(report_template.errors[:config]).not_to be_blank } + end + + context "when missing header_mappgins" do + let(:config) do + { + "worksheet_name" => "Data", + } + end + + it { expect(report_template.errors[:config]).not_to be_blank } + end + + context "when header_mappgins blank" do + let(:config) do + { + "worksheet_name" => "Data", + "header_mappings" => {}, + } + end + + it { expect(report_template.errors[:config]).not_to be_blank } + end + end +end