From 9514c973b94b6201fabddd51cc6e84321e21ffac Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 16:38:05 -0400 Subject: [PATCH 01/38] move in generic version of runner --- .../s3/complete_dump_runner.rb | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb new file mode 100644 index 00000000000..7e27340167c --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +class CompleteDumpRunner + attr_reader :submission_ids, :parent_dir, :successes, :failures, + :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures + + def initialize(submission_ids:, + parent_dir: 'wipn8923-test', + bundle_by_user: true, + run_quiet: true, + quiet_upload_failures: false, + quiet_pdf_failures: false, + signed_link: false) + @submission_ids = submission_ids + @parent_dir = parent_dir + @bundle_by_user = bundle_by_user + @run_quiet = run_quiet # silence but record errors until the end + @quiet_upload_failures = quiet_upload_failures # granular control over how user processing raises errors + @quiet_pdf_failures = quiet_pdf_failures # granular control over how user processing raises errors + @failures = [] + end + + def submissions + @submissions ||= Form526Submission.where(id: submission_ids) + end + + def submissions_by_uuid + @submissions_by_uuid ||= group_submissions_by_uuid + end + + # allows for deduplicating submissions per user on the next layer + def group_submissions_by_uuid + {}.tap do |collection| + submissions.pluck(:user_uuid).uniq.each do |uuid| + collection[uuid] = Form526Submission.where(user_uuid: uuid, id: submission_ids).pluck(:id) + end + end + end + + def run + if bundle_by_user + submissions_by_uuid.each do |uuid, submission_ids| + Rails.logger.info( + "Processing for user: #{uuid} with #{submission_ids&.count} submission(s)", + { uuid:, submission_ids: } + ) + UserSubmissionDumpBuilder.new(uuid:, submission_ids:, parent_dir:).run + rescue => e + raise unless run_quiet + + Rails.logger.error("User failure: #{uuid}", { uuid:, error: e }) + failures << { uuid:, error: e } + end + else + submissions.each_with_index do |sub, idx| + Rails.logger.info( + "Processing submission: #{sub.id} (non-grouped) # #{idx + 1} of #{submissions.count} total submissions", { + submission_id: sub.id, submission_count: submissions.count + } + ) + DumpSubmissionToPdf.new(submission_id: sub.id, parent_dir:, quiet_pdf_failures:, quiet_upload_failures:).run + rescue => e + raise unless run_quiet + + failures << { submission_id: id, error: e } + end + end + parent_dir + end + + def clear_tmp + system('rm -f tmp/* > /dev/null 2>&1') + end + + def s3_resource + @s3_resource ||= Reports::Uploader.new_s3_resource + end + + def target_bucket + @target_bucket ||= Reports::Uploader.s3_bucket + end +end From 72ec29ae05d00c290a11df20bc526ba892251022 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 16:45:00 -0400 Subject: [PATCH 02/38] code refinement --- .../s3/complete_dump_runner.rb | 90 ++++++++++++------- 1 file changed, 56 insertions(+), 34 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb index 7e27340167c..2ee8256554b 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb @@ -20,6 +20,14 @@ def initialize(submission_ids:, @failures = [] end + def run + bundle_by_user ? process_by_user : process_individual_submissions + cleanup_tmp_files + parent_dir + end + + private + def submissions @submissions ||= Form526Submission.where(id: submission_ids) end @@ -28,50 +36,64 @@ def submissions_by_uuid @submissions_by_uuid ||= group_submissions_by_uuid end - # allows for deduplicating submissions per user on the next layer + # Group submissions by user_uuid for easier bundling def group_submissions_by_uuid - {}.tap do |collection| - submissions.pluck(:user_uuid).uniq.each do |uuid| - collection[uuid] = Form526Submission.where(user_uuid: uuid, id: submission_ids).pluck(:id) - end + submissions.group_by(&:user_uuid).transform_values do |user_submissions| + user_submissions.map(&:id) end end - def run - if bundle_by_user - submissions_by_uuid.each do |uuid, submission_ids| - Rails.logger.info( - "Processing for user: #{uuid} with #{submission_ids&.count} submission(s)", - { uuid:, submission_ids: } - ) - UserSubmissionDumpBuilder.new(uuid:, submission_ids:, parent_dir:).run - rescue => e - raise unless run_quiet - - Rails.logger.error("User failure: #{uuid}", { uuid:, error: e }) - failures << { uuid:, error: e } - end - else - submissions.each_with_index do |sub, idx| - Rails.logger.info( - "Processing submission: #{sub.id} (non-grouped) # #{idx + 1} of #{submissions.count} total submissions", { - submission_id: sub.id, submission_count: submissions.count - } - ) - DumpSubmissionToPdf.new(submission_id: sub.id, parent_dir:, quiet_pdf_failures:, quiet_upload_failures:).run - rescue => e - raise unless run_quiet - - failures << { submission_id: id, error: e } - end + def process_by_user + submissions_by_uuid.each do |uuid, submission_ids| + log_info("Processing for user: #{uuid} with #{submission_ids.size} submission(s)", uuid:, submission_ids:) + process_user_submissions(uuid, submission_ids) end - parent_dir end - def clear_tmp + def process_individual_submissions + submissions.each_with_index do |sub, idx| + log_info("Processing submission: #{sub.id} (non-grouped) ##{idx + 1} of #{submissions.count} total submissions", + submission_id: sub.id, submission_count: submissions.count) + process_submission(sub.id) + end + end + + def process_user_submissions(uuid, submission_ids) + UserSubmissionDumpBuilder.new(uuid:, submission_ids:, parent_dir:).run + rescue => e + handle_error("User failure: #{uuid}", e, uuid:) + end + + def process_submission(submission_id) + DumpSubmissionToPdf.new( + submission_id:, + parent_dir:, + quiet_pdf_failures:, + quiet_upload_failures: + ).run + rescue => e + handle_error("Submission failure: #{submission_id}", e, submission_id:) + end + + def handle_error(message, error, context) + raise unless run_quiet + + log_error(message, error, context) + failures << { context => error } + end + + def cleanup_tmp_files system('rm -f tmp/* > /dev/null 2>&1') end + def log_info(message, **details) + Rails.logger.info(message, details) + end + + def log_error(message, error, **details) + Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) + end + def s3_resource @s3_resource ||= Reports::Uploader.new_s3_resource end From c0667860c2b038682fba8e18a12e83883629553f Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 17:10:53 -0400 Subject: [PATCH 03/38] fix long param list --- .../s3/complete_dump_runner.rb | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb index 2ee8256554b..0b30e79a98c 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb @@ -4,19 +4,15 @@ class CompleteDumpRunner attr_reader :submission_ids, :parent_dir, :successes, :failures, :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures - def initialize(submission_ids:, - parent_dir: 'wipn8923-test', - bundle_by_user: true, - run_quiet: true, - quiet_upload_failures: false, - quiet_pdf_failures: false, - signed_link: false) + def initialize(submission_ids:, **options) + defaults = default_options.merge(options) + @submission_ids = submission_ids - @parent_dir = parent_dir - @bundle_by_user = bundle_by_user - @run_quiet = run_quiet # silence but record errors until the end - @quiet_upload_failures = quiet_upload_failures # granular control over how user processing raises errors - @quiet_pdf_failures = quiet_pdf_failures # granular control over how user processing raises errors + @parent_dir = defaults[:parent_dir] + @bundle_by_user = defaults[:bundle_by_user] + @run_quiet = defaults[:run_quiet] + @quiet_upload_failures = defaults[:quiet_upload_failures] + @quiet_pdf_failures = defaults[:quiet_pdf_failures] @failures = [] end @@ -28,8 +24,18 @@ def run private + def default_options + { + parent_dir: 'wipn8923-test', + bundle_by_user: true, + run_quiet: true, # silence but record errors until the end + quiet_upload_failures: false, # granular control over how user processing raises errors + quiet_pdf_failures: false # granular control over how user processing raises errors + } + end + def submissions - @submissions ||= Form526Submission.where(id: submission_ids) + @submissions ||= FormSubmission.where(id: submission_ids) end def submissions_by_uuid From e6c56e2e35a1c66b1466616144e44f951768195e Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 17:15:31 -0400 Subject: [PATCH 04/38] rename class --- .../s3/{complete_dump_runner.rb => submission_dump_handler.rb} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename modules/simple_forms_api/app/services/simple_forms_api/s3/{complete_dump_runner.rb => submission_dump_handler.rb} (99%) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb similarity index 99% rename from modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb index 0b30e79a98c..04c5b460795 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/complete_dump_runner.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb @@ -1,6 +1,6 @@ # frozen_string_literal: true -class CompleteDumpRunner +class SubmissionDumpHandler attr_reader :submission_ids, :parent_dir, :successes, :failures, :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures From 4ef32b14674648419b5f87c1d1b2606d42a35d71 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 17:35:45 -0400 Subject: [PATCH 05/38] add existing version of DumpSubmissionToPdf class --- .../s3/dump_submission_to_pdf.rb | 241 ++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb new file mode 100644 index 00000000000..e409210fc21 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb @@ -0,0 +1,241 @@ +# frozen_string_literal: true + +# To use +# ids = +# parent_dir = +# +# to see your dump in s3 +# 1. go here https://console.amazonaws-us-gov.com/s3/home?region=us-gov-west-1# +# 2. login with 2fa +# 3. search for dsva-vetsgov-prod-reports +# 4. search for your parent_dir name, e.g. 526dump_aug_21st_2024 +# +# If you do not provide a parent_dir, the script defaults to a folder called wipn8923-test +# +# OPTION 1: Run the script with user groupings +# - requires SubmissionDuplicateReport object +# - SubmissionDumpHandler.new(submission_ids: ids, parent_dir:).run +# +# OPTION 2: Run without user groupings +# ids.each { |id| DumpSubmissionToPdf.new(submission_id: id, parent_dir:).run } +# this will just put each submission in a folder by it's id under the parent dir +class DumpSubmissionToPdf + attr_accessor :submission, :parent_dir, :failed_uploads, :include_text_dump, + :quiet_upload_failures, :quiet_pdf_failures, :include_json_dump, :run_quiet + + def initialize(submission_id: nil, submission: nil, **options) + defaults = default_options.merge(options) + + @failures = [] + @submission = defaults[:submission] || FormSubmission.find(submission_id) + @parent_dir = defaults[:parent_dir] + @include_text_dump = defaults[:include_text_dump] + @include_json_dump = defaults[:include_json_dump] + @quiet_upload_failures = defaults[:quiet_upload_failures] + @quiet_pdf_failures = defaults[:quiet_pdf_failures] + end + + def run + log_info(" - submission id: #{submission.id}") + write + write_as_json_dump if include_json_dump + write_as_text_dump if include_text_dump + write_user_uploads if user_uploads.present? + write_metadata + output_directory_path + rescue => e + if run_quiet + @failures << { id: submission.id, error: e.try(:message) || e } + log_error("failed submission: #{submission.id}") + else + raise e + end + end + + private + + def default_options + { + parent_dir: 'wipn8923-test', + include_text_dump: true, # include the form data as a text file + include_json_dump: true, # include the form data as a JSON object + quiet_upload_failures: true, # will skip problematic user uploads if true + quiet_pdf_failures: true, # will skip the PDF generating if it's not working + run_quiet: true + } + end + + def metadata + @metadata ||= generate_metadata + end + + def output_directory_path + @output_directory_path ||= "#{parent_dir}/#{submission.id}" + end + + def s3_resource + @s3_resource ||= Reports::Uploader.new_s3_resource + end + + def target_bucket + @target_bucket ||= Reports::Uploader.s3_bucket + end + + def form_json + @form_json ||= JSON.parse(submission.form_json)['form'] + end + + # ## + # File Writing Helpers: + def write + submission_create_date = submission.created_at.iso8601 + form_json['form']['claimDate'] ||= submission_create_date + form_json['form']['applicationExpirationDate'] = 365.days.from_now.iso8601 + service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) + response = service.get_form(form_json.to_json) + encoded_pdf = response.body['pdf'] + content = Base64.decode64(encoded_pdf) + object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form.pdf") + object.put(body: content) + rescue => e + if quiet_pdf_failures + write_pdf_error(e) + else + raise e + end + end + + def write_pdf_error(error) + content = if error.present? + "#{error.try(:message)}\n\n#{error.try(:messages).try(:join, "\n\t - ")}" + else + 'unknown failure' + end + rescue + content = 'unknown failure' + ensure + object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/pdf_generating_failure_explanation.txt") + object.put(body: content) + end + + def write_as_json_dump + object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form_text_dump.txt") + content = JSON.pretty_generate(submission.form) + object.put(body: content) + end + + def write_alternative + new_target = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form.pdf") + new_target.upload_file(form_initial_path) + Common::FileHelpers.delete_file_if_exists(form_initial_path) + end + + def write_metadata + path = "#{output_directory_path}/metadata.txt" + object = s3_resource.bucket(target_bucket).object(path) + object.put(body: metadata.to_json) + end + + def write_failure_report + path = "#{output_directory_path}/user_upload_failures.txt" + object = s3_resource.bucket(target_bucket).object(path) + content = JSON.pretty_generate(user_upload_failures) + object.put(body: content) + end + + def write_as_text_dump + path = "#{output_directory_path}/form_text_dump.txt" + object = s3_resource.bucket(target_bucket).object(path) + object.put(body: form_text_dump.to_json) + end + + def form_text_dump + @form_text_dump ||= generate_form_text_dump + end + + def generate_form_text_dump + form = submission.form + return form if form['form'].blank? + + form['form']['claimDate'] ||= submission.created_at.iso8601 + form + end + + def user_upload_path + @user_upload_path ||= "#{output_directory_path}/user_uploads" + end + + def user_uploads + @user_uploads ||= submission.form['form_uploads'] + end + + def user_upload_failures + @user_upload_failures ||= [] + end + + # ## + # User Upload Processing: + def write_user_uploads + log_info(" Moving #{user_uploads.count} user uploads:") + user_uploads.each do |upload| + write_user_upload upload + rescue => e + if quiet_upload_failures + user_upload_failures << { + filename: upload['name'], + confirmationCode: upload['attachmentId'], + attachmentId: upload['attachmentId'], + error: e.try(:message) || e || 'unknown error' + } + else + raise e + end + end + write_failure_report if user_upload_failures.present? + end + + def write_user_upload(upload_data) + log_info(" - processing upload: #{upload_data['name']} - #{upload_data['confirmationCode']}") + local = SupportingEvidenceAttachment.find_by(guid: upload_data['confirmationCode']) + raise 'No local record found' if local.blank? + + read_bucket = local.get_file.uploader.aws_bucket + aws_path = local.get_file.path + old_obj = s3_resource.bucket(read_bucket).object(aws_path) + new_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{upload_data['name']}") + new_obj.copy_from(old_obj) + end + + # ## + # Metadata Processing: + # create metadata json with + # - vet PII + # - formsIncluded value indicates to the reviewing admin that nothing is missing + # - GUIDs of failed document uploads + def generate_metadata + return {} unless submission.auth_headers.present? && submission.form['form'].present? + + zc = submission.form.dig('form', 'veteran', 'currentMailingAddress') + zipcode = zc.nil? ? '00000' : [zc['zipFirstFive'], zc['zipLastFour']].join('-') + pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] + pii.merge({ + fileNumber: pii['va_eauth_pnid'], + birlsfilenumber: pii['va_eauth_birlsfilenumber'], + zipCode: zipcode, + claimDate: submission.created_at.iso8601, + formsIncluded: map_form_inclusion + }) + end + + def map_form_inclusion + %w[form1 form2].select { |type| submission.form[type].present? } + end + + def log_info(message, **details) + Rails.logger.info(message, details) + end + + def log_error(message, error, **details) + Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) + end +end From 3309a0e8a54ac608eb28de7bef0b6cff93308c28 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 17:54:37 -0400 Subject: [PATCH 06/38] refine DumpSubmissionToPdf class further --- .../s3/dump_submission_to_pdf.rb | 245 ++++++++---------- 1 file changed, 103 insertions(+), 142 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb index e409210fc21..c81b9425f35 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb @@ -20,14 +20,15 @@ # ids.each { |id| DumpSubmissionToPdf.new(submission_id: id, parent_dir:).run } # this will just put each submission in a folder by it's id under the parent dir class DumpSubmissionToPdf - attr_accessor :submission, :parent_dir, :failed_uploads, :include_text_dump, - :quiet_upload_failures, :quiet_pdf_failures, :include_json_dump, :run_quiet + attr_reader :failures, :form_id, :include_json_dump, :include_text_dump, + :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :submission - def initialize(submission_id: nil, submission: nil, **options) + def initialize(form_id: nil, submission_id: nil, submission: nil, **options) defaults = default_options.merge(options) @failures = [] - @submission = defaults[:submission] || FormSubmission.find(submission_id) + @form_id = form_id + @submission = submission || FormSubmission.find(submission_id) @parent_dir = defaults[:parent_dir] @include_text_dump = defaults[:include_text_dump] @include_json_dump = defaults[:include_json_dump] @@ -36,206 +37,166 @@ def initialize(submission_id: nil, submission: nil, **options) end def run - log_info(" - submission id: #{submission.id}") - write - write_as_json_dump if include_json_dump - write_as_text_dump if include_text_dump - write_user_uploads if user_uploads.present? - write_metadata + log_info("Processing submission ID: #{submission.id}") + process_submission_files output_directory_path - rescue => e - if run_quiet - @failures << { id: submission.id, error: e.try(:message) || e } - log_error("failed submission: #{submission.id}") - else - raise e - end + rescue StandardError => e + handle_run_error(e) end private def default_options { - parent_dir: 'wipn8923-test', - include_text_dump: true, # include the form data as a text file include_json_dump: true, # include the form data as a JSON object - quiet_upload_failures: true, # will skip problematic user uploads if true - quiet_pdf_failures: true, # will skip the PDF generating if it's not working + include_text_dump: true, # include the form data as a text file + parent_dir: 'wipn8923-test', + quiet_pdf_failures: true, # skip PDF generation silently + quiet_upload_failures: true, # skip problematic uploads silently run_quiet: true } end - def metadata - @metadata ||= generate_metadata - end - - def output_directory_path - @output_directory_path ||= "#{parent_dir}/#{submission.id}" + def process_submission_files + write_pdf + write_as_json_dump if include_json_dump + write_as_text_dump if include_text_dump + write_user_uploads if user_uploads.present? + write_metadata end - def s3_resource - @s3_resource ||= Reports::Uploader.new_s3_resource - end + def handle_run_error(error) + raise error unless default_options[:run_quiet] - def target_bucket - @target_bucket ||= Reports::Uploader.s3_bucket + failures << { id: submission.id, error: error.message } + log_error("Failed submission: #{submission.id}", error) end - def form_json - @form_json ||= JSON.parse(submission.form_json)['form'] + def write_pdf + encoded_pdf = generate_pdf_content + save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) + rescue StandardError => e + quiet_pdf_failures ? write_pdf_error(e) : raise(e) end - # ## - # File Writing Helpers: - def write - submission_create_date = submission.created_at.iso8601 - form_json['form']['claimDate'] ||= submission_create_date - form_json['form']['applicationExpirationDate'] = 365.days.from_now.iso8601 + def generate_pdf_content service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) - response = service.get_form(form_json.to_json) - encoded_pdf = response.body['pdf'] - content = Base64.decode64(encoded_pdf) - object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form.pdf") - object.put(body: content) - rescue => e - if quiet_pdf_failures - write_pdf_error(e) - else - raise e - end + service.get_form(form_json.to_json).body['pdf'] end def write_pdf_error(error) - content = if error.present? - "#{error.try(:message)}\n\n#{error.try(:messages).try(:join, "\n\t - ")}" - else - 'unknown failure' - end - rescue - content = 'unknown failure' - ensure - object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/pdf_generating_failure_explanation.txt") - object.put(body: content) + log_error("PDF generation failed for submission: #{submission.id}", error) + save_file_to_s3("#{output_directory_path}/pdf_generating_failure.txt", error_details(error)) + end + + def error_details(error) + "#{error.message}\n\n#{error.backtrace.join("\n")}" end def write_as_json_dump - object = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form_text_dump.txt") - content = JSON.pretty_generate(submission.form) - object.put(body: content) + save_file_to_s3("#{output_directory_path}/form_text_dump.json", JSON.pretty_generate(form_json)) end - def write_alternative - new_target = s3_resource.bucket(target_bucket).object("#{output_directory_path}/form.pdf") - new_target.upload_file(form_initial_path) - Common::FileHelpers.delete_file_if_exists(form_initial_path) + def write_as_text_dump + save_file_to_s3("#{output_directory_path}/form_text_dump.txt", form_text_dump.to_json) end def write_metadata - path = "#{output_directory_path}/metadata.txt" - object = s3_resource.bucket(target_bucket).object(path) - object.put(body: metadata.to_json) + save_file_to_s3("#{output_directory_path}/metadata.json", metadata.to_json) end - def write_failure_report - path = "#{output_directory_path}/user_upload_failures.txt" - object = s3_resource.bucket(target_bucket).object(path) - content = JSON.pretty_generate(user_upload_failures) - object.put(body: content) + def write_user_uploads + log_info("Moving #{user_uploads.count} user uploads") + user_uploads.each { |upload| process_user_upload(upload) } + write_failure_report if user_upload_failures.present? + rescue StandardError => e + handle_upload_error(e) end - def write_as_text_dump - path = "#{output_directory_path}/form_text_dump.txt" - object = s3_resource.bucket(target_bucket).object(path) - object.put(body: form_text_dump.to_json) + def process_user_upload(upload) + log_info("Processing upload: #{upload['name']} - #{upload['confirmationCode']}") + local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) + raise 'Local record not found' unless local_file + + copy_file_between_buckets(local_file) end - def form_text_dump - @form_text_dump ||= generate_form_text_dump + def copy_file_between_buckets(local_file) + source_obj = s3_resource.bucket(local_file.get_file.uploader.aws_bucket).object(local_file.get_file.path) + target_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{local_file.get_file.filename}") + target_obj.copy_from(source_obj) end - def generate_form_text_dump - form = submission.form - return form if form['form'].blank? + def write_failure_report + save_file_to_s3("#{output_directory_path}/user_upload_failures.txt", JSON.pretty_generate(user_upload_failures)) + end - form['form']['claimDate'] ||= submission.created_at.iso8601 - form + def save_file_to_s3(path, content) + s3_resource.bucket(target_bucket).object(path).put(body: content) end - def user_upload_path - @user_upload_path ||= "#{output_directory_path}/user_uploads" + def s3_resource + @s3_resource ||= Reports::Uploader.new_s3_resource end - def user_uploads - @user_uploads ||= submission.form['form_uploads'] + def target_bucket + @target_bucket ||= Reports::Uploader.s3_bucket end - def user_upload_failures - @user_upload_failures ||= [] + def form_json + @form_json ||= JSON.parse(submission.form_json)[form_id] end - # ## - # User Upload Processing: - def write_user_uploads - log_info(" Moving #{user_uploads.count} user uploads:") - user_uploads.each do |upload| - write_user_upload upload - rescue => e - if quiet_upload_failures - user_upload_failures << { - filename: upload['name'], - confirmationCode: upload['attachmentId'], - attachmentId: upload['attachmentId'], - error: e.try(:message) || e || 'unknown error' - } - else - raise e - end - end - write_failure_report if user_upload_failures.present? + def form_text_dump + form = submission.form + form[form_id]['claimDate'] ||= submission.created_at.iso8601 + form end - def write_user_upload(upload_data) - log_info(" - processing upload: #{upload_data['name']} - #{upload_data['confirmationCode']}") - local = SupportingEvidenceAttachment.find_by(guid: upload_data['confirmationCode']) - raise 'No local record found' if local.blank? + def metadata + return {} unless submission.auth_headers.present? && submission.form[form_id].present? - read_bucket = local.get_file.uploader.aws_bucket - aws_path = local.get_file.path - old_obj = s3_resource.bucket(read_bucket).object(aws_path) - new_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{upload_data['name']}") - new_obj.copy_from(old_obj) + extract_metadata_from_submission end - # ## - # Metadata Processing: - # create metadata json with - # - vet PII - # - formsIncluded value indicates to the reviewing admin that nothing is missing - # - GUIDs of failed document uploads - def generate_metadata - return {} unless submission.auth_headers.present? && submission.form['form'].present? - - zc = submission.form.dig('form', 'veteran', 'currentMailingAddress') - zipcode = zc.nil? ? '00000' : [zc['zipFirstFive'], zc['zipLastFour']].join('-') + def extract_metadata_from_submission + address = submission.form.dig(form_id, 'veteran', 'currentMailingAddress') + zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] pii.merge({ - fileNumber: pii['va_eauth_pnid'], - birlsfilenumber: pii['va_eauth_birlsfilenumber'], - zipCode: zipcode, - claimDate: submission.created_at.iso8601, - formsIncluded: map_form_inclusion - }) + fileNumber: pii['va_eauth_pnid'], + zipCode: zip || '00000', + claimDate: submission.created_at.iso8601, + formsIncluded: map_form_inclusion + }) end def map_form_inclusion %w[form1 form2].select { |type| submission.form[type].present? } end - def log_info(message, **details) - Rails.logger.info(message, details) + def log_info(message) + Rails.logger.info(message) end - def log_error(message, error, **details) - Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) + def log_error(message, error) + Rails.logger.error("#{message}: #{error.message}") + end + + def output_directory_path + @output_directory_path ||= "#{parent_dir}/#{submission.id}" + end + + def user_uploads + @user_uploads ||= submission.form['form_uploads'] + end + + def user_upload_failures + @user_upload_failures ||= [] + end + + def user_upload_path + @user_upload_path ||= "#{output_directory_path}/user_uploads" end end + From 83376ea1e33dd7b736a3bd3ca7828d7548baa789 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 17:57:54 -0400 Subject: [PATCH 07/38] add existing version of UserSubmissionDumpBuilder class --- .../s3/dump_submission_to_pdf.rb | 17 +++-- .../s3/user_submission_dump_builder.rb | 63 +++++++++++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb index c81b9425f35..12f02937c05 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb @@ -40,7 +40,7 @@ def run log_info("Processing submission ID: #{submission.id}") process_submission_files output_directory_path - rescue StandardError => e + rescue => e handle_run_error(e) end @@ -75,7 +75,7 @@ def handle_run_error(error) def write_pdf encoded_pdf = generate_pdf_content save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) - rescue StandardError => e + rescue => e quiet_pdf_failures ? write_pdf_error(e) : raise(e) end @@ -109,7 +109,7 @@ def write_user_uploads log_info("Moving #{user_uploads.count} user uploads") user_uploads.each { |upload| process_user_upload(upload) } write_failure_report if user_upload_failures.present? - rescue StandardError => e + rescue => e handle_upload_error(e) end @@ -164,11 +164,11 @@ def extract_metadata_from_submission zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] pii.merge({ - fileNumber: pii['va_eauth_pnid'], - zipCode: zip || '00000', - claimDate: submission.created_at.iso8601, - formsIncluded: map_form_inclusion - }) + fileNumber: pii['va_eauth_pnid'], + zipCode: zip || '00000', + claimDate: submission.created_at.iso8601, + formsIncluded: map_form_inclusion + }) end def map_form_inclusion @@ -199,4 +199,3 @@ def user_upload_path @user_upload_path ||= "#{output_directory_path}/user_uploads" end end - diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb new file mode 100644 index 00000000000..a4758a39e5d --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +class UserSubmissionDumpBuilder + attr_reader :uuid, :user_dir, :links, :submission_ids + + def initialize(uuid:, submission_ids:, parent_dir: 'wipn8923-test') + @submission_ids = submission_ids + @uuid = uuid + @user_dir = "#{parent_dir}/#{uuid}" + @links = [] + end + + def run + write_user_submissions + # write_dedupe_files + user_dir + end + + def write_user_submissions + submissions.each do |submission| + DumpSubmissionToPdf.new(submission:, parent_dir: user_dir).run + end + end + + def submissions + @submissions ||= Form526Submission.where(id: submission_ids) + end + + def write_dedupe_files + content = "The following mismatched form data was identified for user (uuid): #{uuid}\n" + if dedupe_report_for_user.blank? + content << "\nNo variations in users submissions! \n" + else + dedupe_report_for_user.each do |key_chain, diff| + next if diff.blank? + + content << "\tnested under form keys #{key_chain.join(' -> ')}...\n" + diff.each do |value, submission_ids| + content << "\t\tthese submissions: #{submission_ids.join(', ')}\n" + content << "\t\t\thave a value of: '#{value}'\n" + end + end + end + s3_resource.bucket(target_bucket) + .object("#{user_dir}/duplicate_report_pretty.txt") + .put(body: content) + s3_resource.bucket(target_bucket) + .object("#{user_dir}/duplicate_report.json") + .put(body: dedupe_report_for_user.to_json) + end + + def dedupe_report_for_user + @dedupe_report_for_user ||= SubmissionDuplicateReport.new(submission_ids:).run[uuid] + end + + def s3_resource + @s3_resource ||= Reports::Uploader.new_s3_resource + end + + def target_bucket + @target_bucket ||= Reports::Uploader.s3_bucket + end +end From 45a9517de9bec293f89354b1f6245f79e2233723 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 5 Sep 2024 18:06:26 -0400 Subject: [PATCH 08/38] refine UserSubmissionDumpBuilder class further --- .../s3/user_submission_dump_builder.rb | 65 ++++++++++--------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb index a4758a39e5d..4b4aef78de0 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb @@ -1,56 +1,51 @@ # frozen_string_literal: true class UserSubmissionDumpBuilder - attr_reader :uuid, :user_dir, :links, :submission_ids + attr_reader :uuid, :user_dir, :submission_ids def initialize(uuid:, submission_ids:, parent_dir: 'wipn8923-test') @submission_ids = submission_ids @uuid = uuid - @user_dir = "#{parent_dir}/#{uuid}" - @links = [] + @user_dir = build_user_directory(parent_dir) end def run + log_info("Starting dump for user: #{uuid}, Submissions: #{submission_ids}") write_user_submissions - # write_dedupe_files + log_info("Dump completed for user: #{uuid}") user_dir + rescue => e + log_error("Error in dump process for user: #{uuid}", e) + raise e + end + + private + + def build_user_directory(parent_dir) + "#{parent_dir}/#{uuid}" end def write_user_submissions submissions.each do |submission| - DumpSubmissionToPdf.new(submission:, parent_dir: user_dir).run + dump_submission(submission) + rescue => e + log_error("Failed to dump submission: #{submission.id} for user: #{uuid}", e) end end + def dump_submission(submission) + log_info("Processing submission: #{submission.id}") + DumpSubmissionToPdf.new(submission:, parent_dir: user_dir).run + end + def submissions - @submissions ||= Form526Submission.where(id: submission_ids) - end - - def write_dedupe_files - content = "The following mismatched form data was identified for user (uuid): #{uuid}\n" - if dedupe_report_for_user.blank? - content << "\nNo variations in users submissions! \n" - else - dedupe_report_for_user.each do |key_chain, diff| - next if diff.blank? - - content << "\tnested under form keys #{key_chain.join(' -> ')}...\n" - diff.each do |value, submission_ids| - content << "\t\tthese submissions: #{submission_ids.join(', ')}\n" - content << "\t\t\thave a value of: '#{value}'\n" - end - end - end - s3_resource.bucket(target_bucket) - .object("#{user_dir}/duplicate_report_pretty.txt") - .put(body: content) - s3_resource.bucket(target_bucket) - .object("#{user_dir}/duplicate_report.json") - .put(body: dedupe_report_for_user.to_json) + @submissions ||= fetch_submissions end - def dedupe_report_for_user - @dedupe_report_for_user ||= SubmissionDuplicateReport.new(submission_ids:).run[uuid] + def fetch_submissions + FormSubmission.where(id: submission_ids).tap do |subs| + log_info("Fetched #{subs.count} submissions for user: #{uuid}") + end end def s3_resource @@ -60,4 +55,12 @@ def s3_resource def target_bucket @target_bucket ||= Reports::Uploader.s3_bucket end + + def log_info(message) + Rails.logger.info(message) + end + + def log_error(message, error) + Rails.logger.error("#{message}. Error: #{error.message}") + end end From 6359235a210318c6706037cfd5b69832bf2afab8 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 10:59:02 -0400 Subject: [PATCH 09/38] further refinement of shared logic and naming --- .../s3/submission_dump_handler.rb | 110 ------------------ .../s3/user_submission_dump_builder.rb | 66 ----------- .../archive_submission_to_pdf.rb} | 59 +++++----- .../s3_service/submission_archive_handler.rb | 99 ++++++++++++++++ .../user_submission_archive_handler.rb | 54 +++++++++ .../simple_forms_api/s3_service/utils.rb | 25 ++++ 6 files changed, 207 insertions(+), 206 deletions(-) delete mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb delete mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb rename modules/simple_forms_api/app/services/simple_forms_api/{s3/dump_submission_to_pdf.rb => s3_service/archive_submission_to_pdf.rb} (77%) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb deleted file mode 100644 index 04c5b460795..00000000000 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_dump_handler.rb +++ /dev/null @@ -1,110 +0,0 @@ -# frozen_string_literal: true - -class SubmissionDumpHandler - attr_reader :submission_ids, :parent_dir, :successes, :failures, - :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures - - def initialize(submission_ids:, **options) - defaults = default_options.merge(options) - - @submission_ids = submission_ids - @parent_dir = defaults[:parent_dir] - @bundle_by_user = defaults[:bundle_by_user] - @run_quiet = defaults[:run_quiet] - @quiet_upload_failures = defaults[:quiet_upload_failures] - @quiet_pdf_failures = defaults[:quiet_pdf_failures] - @failures = [] - end - - def run - bundle_by_user ? process_by_user : process_individual_submissions - cleanup_tmp_files - parent_dir - end - - private - - def default_options - { - parent_dir: 'wipn8923-test', - bundle_by_user: true, - run_quiet: true, # silence but record errors until the end - quiet_upload_failures: false, # granular control over how user processing raises errors - quiet_pdf_failures: false # granular control over how user processing raises errors - } - end - - def submissions - @submissions ||= FormSubmission.where(id: submission_ids) - end - - def submissions_by_uuid - @submissions_by_uuid ||= group_submissions_by_uuid - end - - # Group submissions by user_uuid for easier bundling - def group_submissions_by_uuid - submissions.group_by(&:user_uuid).transform_values do |user_submissions| - user_submissions.map(&:id) - end - end - - def process_by_user - submissions_by_uuid.each do |uuid, submission_ids| - log_info("Processing for user: #{uuid} with #{submission_ids.size} submission(s)", uuid:, submission_ids:) - process_user_submissions(uuid, submission_ids) - end - end - - def process_individual_submissions - submissions.each_with_index do |sub, idx| - log_info("Processing submission: #{sub.id} (non-grouped) ##{idx + 1} of #{submissions.count} total submissions", - submission_id: sub.id, submission_count: submissions.count) - process_submission(sub.id) - end - end - - def process_user_submissions(uuid, submission_ids) - UserSubmissionDumpBuilder.new(uuid:, submission_ids:, parent_dir:).run - rescue => e - handle_error("User failure: #{uuid}", e, uuid:) - end - - def process_submission(submission_id) - DumpSubmissionToPdf.new( - submission_id:, - parent_dir:, - quiet_pdf_failures:, - quiet_upload_failures: - ).run - rescue => e - handle_error("Submission failure: #{submission_id}", e, submission_id:) - end - - def handle_error(message, error, context) - raise unless run_quiet - - log_error(message, error, context) - failures << { context => error } - end - - def cleanup_tmp_files - system('rm -f tmp/* > /dev/null 2>&1') - end - - def log_info(message, **details) - Rails.logger.info(message, details) - end - - def log_error(message, error, **details) - Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) - end - - def s3_resource - @s3_resource ||= Reports::Uploader.new_s3_resource - end - - def target_bucket - @target_bucket ||= Reports::Uploader.s3_bucket - end -end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb deleted file mode 100644 index 4b4aef78de0..00000000000 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/user_submission_dump_builder.rb +++ /dev/null @@ -1,66 +0,0 @@ -# frozen_string_literal: true - -class UserSubmissionDumpBuilder - attr_reader :uuid, :user_dir, :submission_ids - - def initialize(uuid:, submission_ids:, parent_dir: 'wipn8923-test') - @submission_ids = submission_ids - @uuid = uuid - @user_dir = build_user_directory(parent_dir) - end - - def run - log_info("Starting dump for user: #{uuid}, Submissions: #{submission_ids}") - write_user_submissions - log_info("Dump completed for user: #{uuid}") - user_dir - rescue => e - log_error("Error in dump process for user: #{uuid}", e) - raise e - end - - private - - def build_user_directory(parent_dir) - "#{parent_dir}/#{uuid}" - end - - def write_user_submissions - submissions.each do |submission| - dump_submission(submission) - rescue => e - log_error("Failed to dump submission: #{submission.id} for user: #{uuid}", e) - end - end - - def dump_submission(submission) - log_info("Processing submission: #{submission.id}") - DumpSubmissionToPdf.new(submission:, parent_dir: user_dir).run - end - - def submissions - @submissions ||= fetch_submissions - end - - def fetch_submissions - FormSubmission.where(id: submission_ids).tap do |subs| - log_info("Fetched #{subs.count} submissions for user: #{uuid}") - end - end - - def s3_resource - @s3_resource ||= Reports::Uploader.new_s3_resource - end - - def target_bucket - @target_bucket ||= Reports::Uploader.s3_bucket - end - - def log_info(message) - Rails.logger.info(message) - end - - def log_error(message, error) - Rails.logger.error("#{message}. Error: #{error.message}") - end -end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb similarity index 77% rename from modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index 12f02937c05..d5159eb9dc6 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/dump_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -1,27 +1,33 @@ # frozen_string_literal: true # To use -# ids = -# parent_dir = +# ids = +# parent_dir = # -# to see your dump in s3 +# to see your archive in s3 # 1. go here https://console.amazonaws-us-gov.com/s3/home?region=us-gov-west-1# # 2. login with 2fa # 3. search for dsva-vetsgov-prod-reports -# 4. search for your parent_dir name, e.g. 526dump_aug_21st_2024 +# 4. search for your parent_dir name, e.g. 526archive_aug_21st_2024 # # If you do not provide a parent_dir, the script defaults to a folder called wipn8923-test # # OPTION 1: Run the script with user groupings # - requires SubmissionDuplicateReport object -# - SubmissionDumpHandler.new(submission_ids: ids, parent_dir:).run +# - SubmissionArchiveHandler.new(submission_ids: ids, parent_dir:).run # # OPTION 2: Run without user groupings -# ids.each { |id| DumpSubmissionToPdf.new(submission_id: id, parent_dir:).run } +# ids.each { |id| ArchiveSubmissionToPdf.new(submission_id: id, parent_dir:).run } # this will just put each submission in a folder by it's id under the parent dir -class DumpSubmissionToPdf - attr_reader :failures, :form_id, :include_json_dump, :include_text_dump, - :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :submission +class ArchiveSubmissionToPdf + attr_reader :failures, :form_id, :include_json_archive, :include_text_archive, + :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, + :submission + + VALID_VFF_FORMS = %w[ + 20-10206 20-10207 21-0845 21-0966 21-0972 21-10210 + 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 + ].freeze def initialize(form_id: nil, submission_id: nil, submission: nil, **options) defaults = default_options.merge(options) @@ -30,10 +36,11 @@ def initialize(form_id: nil, submission_id: nil, submission: nil, **options) @form_id = form_id @submission = submission || FormSubmission.find(submission_id) @parent_dir = defaults[:parent_dir] - @include_text_dump = defaults[:include_text_dump] - @include_json_dump = defaults[:include_json_dump] + @include_text_archive = defaults[:include_text_archive] + @include_json_archive = defaults[:include_json_archive] @quiet_upload_failures = defaults[:quiet_upload_failures] @quiet_pdf_failures = defaults[:quiet_pdf_failures] + @run_quiet = defaults[:run_quiet] end def run @@ -48,8 +55,8 @@ def run def default_options { - include_json_dump: true, # include the form data as a JSON object - include_text_dump: true, # include the form data as a text file + include_json_archive: true, # include the form data as a JSON object + include_text_archive: true, # include the form data as a text file parent_dir: 'wipn8923-test', quiet_pdf_failures: true, # skip PDF generation silently quiet_upload_failures: true, # skip problematic uploads silently @@ -59,14 +66,14 @@ def default_options def process_submission_files write_pdf - write_as_json_dump if include_json_dump - write_as_text_dump if include_text_dump + write_as_json_archive if include_json_archive + write_as_text_archive if include_text_archive write_user_uploads if user_uploads.present? write_metadata end def handle_run_error(error) - raise error unless default_options[:run_quiet] + raise error unless run_quiet failures << { id: submission.id, error: error.message } log_error("Failed submission: #{submission.id}", error) @@ -93,12 +100,12 @@ def error_details(error) "#{error.message}\n\n#{error.backtrace.join("\n")}" end - def write_as_json_dump - save_file_to_s3("#{output_directory_path}/form_text_dump.json", JSON.pretty_generate(form_json)) + def write_as_json_archive + save_file_to_s3("#{output_directory_path}/form_text_archive.json", JSON.pretty_generate(form_json)) end - def write_as_text_dump - save_file_to_s3("#{output_directory_path}/form_text_dump.txt", form_text_dump.to_json) + def write_as_text_archive + save_file_to_s3("#{output_directory_path}/form_text_archive.txt", form_text_archive.to_json) end def write_metadata @@ -147,7 +154,7 @@ def form_json @form_json ||= JSON.parse(submission.form_json)[form_id] end - def form_text_dump + def form_text_archive form = submission.form form[form_id]['claimDate'] ||= submission.created_at.iso8601 form @@ -172,15 +179,7 @@ def extract_metadata_from_submission end def map_form_inclusion - %w[form1 form2].select { |type| submission.form[type].present? } - end - - def log_info(message) - Rails.logger.info(message) - end - - def log_error(message, error) - Rails.logger.error("#{message}: #{error.message}") + VALID_VFF_FORMS.select { |type| submission.form[type].present? } end def output_directory_path diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb new file mode 100644 index 00000000000..594d521f037 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -0,0 +1,99 @@ +# frozen_string_literal: true + +module SimpleFormsApi + module S3Service + class SubmissionArchiveHandler < SimpleFormsApi::S3Service::Utils + attr_reader :submission_ids, :parent_dir, :successes, :failures, + :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures + + def initialize(submission_ids:, **options) + defaults = default_options.merge(options) + + @submission_ids = submission_ids + @parent_dir = defaults[:parent_dir] + @bundle_by_user = defaults[:bundle_by_user] + @run_quiet = defaults[:run_quiet] + @quiet_upload_failures = defaults[:quiet_upload_failures] + @quiet_pdf_failures = defaults[:quiet_pdf_failures] + @failures = [] + end + + def run + bundle_by_user ? process_by_user : process_individual_submissions + cleanup_tmp_files + parent_dir + end + + private + + def default_options + { + bundle_by_user: true, + parent_dir: 'wipn8923-test', + quiet_pdf_failures: false, # granular control over how user processing raises errors + quiet_upload_failures: false, # granular control over how user processing raises errors + run_quiet: true # silence but record errors until the end + } + end + + def submissions + @submissions ||= FormSubmission.where(id: submission_ids) + end + + def submissions_by_uuid + @submissions_by_uuid ||= group_submissions_by_uuid + end + + def group_submissions_by_uuid + submissions.group_by(&:user_uuid).transform_values do |user_submissions| + user_submissions.map(&:id) + end + end + + def process_by_user + submissions_by_uuid.each do |uuid, submission_ids| + log_info("Processing for user: #{uuid} with #{submission_ids.size} submission(s)", uuid:, submission_ids:) + process_user_submissions(uuid, submission_ids) + end + end + + def process_individual_submissions + submissions.each_with_index do |sub, idx| + log_info( + "Processing submission: #{sub.id} (non-grouped) ##{idx + 1} of #{submissions.count} total submissions", + submission_id: sub.id, submission_count: submissions.count + ) + process_submission(sub.id) + end + end + + def process_user_submissions(uuid, submission_ids) + UserSubmissionArchiveHandler.new(uuid:, submission_ids:, parent_dir:).run + rescue => e + handle_error("User failure: #{uuid}", e, uuid:) + end + + def process_submission(submission_id) + ArchiveSubmissionToPdf.new( + submission_id:, + parent_dir:, + quiet_pdf_failures:, + quiet_upload_failures: + ).run + rescue => e + handle_error("Submission failure: #{submission_id}", e, submission_id:) + end + + def handle_error(message, error, context) + raise unless run_quiet + + log_error(message, error, context) + failures << { context => error } + end + + def cleanup_tmp_files + system('rm -f tmp/* > /dev/null 2>&1') + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb new file mode 100644 index 00000000000..1b463a5e65a --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module SimpleFormsApi + module S3Service + class UserSubmissionArchiveHandler < SimpleFormsApi::S3Service::Utils + attr_reader :uuid, :user_dir, :submission_ids + + def initialize(uuid:, submission_ids:, parent_dir: 'wipn8923-test') + @submission_ids = submission_ids + @uuid = uuid + @user_dir = build_user_directory(parent_dir) + end + + def run + log_info("Starting archive for user: #{uuid}, Submissions: #{submission_ids}") + write_user_submissions + log_info("Archive completed for user: #{uuid}") + user_dir + rescue => e + log_error("Error in archive process for user: #{uuid}", e) + raise e + end + + private + + def build_user_directory(parent_dir) + "#{parent_dir}/#{uuid}" + end + + def write_user_submissions + submissions.each do |submission| + archive_submission(submission) + rescue => e + log_error("Failed to archive submission: #{submission.id} for user: #{uuid}", e) + end + end + + def archive_submission(submission) + log_info("Processing submission: #{submission.id}") + ArchiveSubmissionToPdf.new(submission:, parent_dir: user_dir).run + end + + def submissions + @submissions ||= fetch_submissions + end + + def fetch_submissions + FormSubmission.where(id: submission_ids).tap do |subs| + log_info("Fetched #{subs.count} submissions for user: #{uuid}") + end + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb new file mode 100644 index 00000000000..89378cc0331 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb @@ -0,0 +1,25 @@ +# frozen_string_literal: true + +module SimpleFormsApi + module S3Service + class Utils + private + + def log_info(message, **details) + Rails.logger.info(message, details) + end + + def log_error(message, error, **details) + Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) + end + + def s3_resource + @s3_resource ||= Reports::Uploader.new_s3_resource + end + + def target_bucket + @target_bucket ||= Reports::Uploader.s3_bucket + end + end + end +end From 56676ae0fc2d3073d88912c34d0754c0705df963 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 10:59:42 -0400 Subject: [PATCH 10/38] add Sidekiq job to handle running script --- .../submission_archive_handler_job.rb | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb new file mode 100644 index 00000000000..1ea5d043316 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module SimpleFormsApi + module S3Service + class SubmissionArchiveHandlerJob < SimpleFormsApi::S3Service::Utils + include Sidekiq::Worker + + sidekiq_options retry: 3, queue: 'default' + + def perform(submission_ids:, **options) + defaults = default_options.merge(options) + + runner = SubmissionArchiveHandler.new(submission_ids:, **defaults) + result_dir = runner.run + log_info("Job completed successfully. Results saved in directory: #{result_dir}") + rescue => e + handle_job_error(e) + end + + private + + def default_options + { + bundle_by_user: true, + parent_dir: 'wipn8923-test', + quiet_pdf_failures: false, + quiet_upload_failures: false, + run_quiet: true, + signed_link: false + } + end + + def handle_job_error(error) + log_error('SubmissionArchiveHandlerJob failed.', error) + raise error + end + end + end +end From cab5b3820b3be54cdf4541241dc476eef4fbe364 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 11:22:41 -0400 Subject: [PATCH 11/38] minor tweaks --- .../s3_service/archive_submission_to_pdf.rb | 350 +++++++++--------- 1 file changed, 173 insertions(+), 177 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index d5159eb9dc6..9501cbd1c7e 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -19,182 +19,178 @@ # OPTION 2: Run without user groupings # ids.each { |id| ArchiveSubmissionToPdf.new(submission_id: id, parent_dir:).run } # this will just put each submission in a folder by it's id under the parent dir -class ArchiveSubmissionToPdf - attr_reader :failures, :form_id, :include_json_archive, :include_text_archive, - :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, - :submission - - VALID_VFF_FORMS = %w[ - 20-10206 20-10207 21-0845 21-0966 21-0972 21-10210 - 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 - ].freeze - - def initialize(form_id: nil, submission_id: nil, submission: nil, **options) - defaults = default_options.merge(options) - - @failures = [] - @form_id = form_id - @submission = submission || FormSubmission.find(submission_id) - @parent_dir = defaults[:parent_dir] - @include_text_archive = defaults[:include_text_archive] - @include_json_archive = defaults[:include_json_archive] - @quiet_upload_failures = defaults[:quiet_upload_failures] - @quiet_pdf_failures = defaults[:quiet_pdf_failures] - @run_quiet = defaults[:run_quiet] - end - - def run - log_info("Processing submission ID: #{submission.id}") - process_submission_files - output_directory_path - rescue => e - handle_run_error(e) - end - - private - - def default_options - { - include_json_archive: true, # include the form data as a JSON object - include_text_archive: true, # include the form data as a text file - parent_dir: 'wipn8923-test', - quiet_pdf_failures: true, # skip PDF generation silently - quiet_upload_failures: true, # skip problematic uploads silently - run_quiet: true - } - end - - def process_submission_files - write_pdf - write_as_json_archive if include_json_archive - write_as_text_archive if include_text_archive - write_user_uploads if user_uploads.present? - write_metadata - end - - def handle_run_error(error) - raise error unless run_quiet - - failures << { id: submission.id, error: error.message } - log_error("Failed submission: #{submission.id}", error) - end - - def write_pdf - encoded_pdf = generate_pdf_content - save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) - rescue => e - quiet_pdf_failures ? write_pdf_error(e) : raise(e) - end - - def generate_pdf_content - service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) - service.get_form(form_json.to_json).body['pdf'] - end - - def write_pdf_error(error) - log_error("PDF generation failed for submission: #{submission.id}", error) - save_file_to_s3("#{output_directory_path}/pdf_generating_failure.txt", error_details(error)) - end - - def error_details(error) - "#{error.message}\n\n#{error.backtrace.join("\n")}" - end - - def write_as_json_archive - save_file_to_s3("#{output_directory_path}/form_text_archive.json", JSON.pretty_generate(form_json)) - end - - def write_as_text_archive - save_file_to_s3("#{output_directory_path}/form_text_archive.txt", form_text_archive.to_json) - end - - def write_metadata - save_file_to_s3("#{output_directory_path}/metadata.json", metadata.to_json) - end - - def write_user_uploads - log_info("Moving #{user_uploads.count} user uploads") - user_uploads.each { |upload| process_user_upload(upload) } - write_failure_report if user_upload_failures.present? - rescue => e - handle_upload_error(e) - end - - def process_user_upload(upload) - log_info("Processing upload: #{upload['name']} - #{upload['confirmationCode']}") - local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) - raise 'Local record not found' unless local_file - - copy_file_between_buckets(local_file) - end - - def copy_file_between_buckets(local_file) - source_obj = s3_resource.bucket(local_file.get_file.uploader.aws_bucket).object(local_file.get_file.path) - target_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{local_file.get_file.filename}") - target_obj.copy_from(source_obj) - end - - def write_failure_report - save_file_to_s3("#{output_directory_path}/user_upload_failures.txt", JSON.pretty_generate(user_upload_failures)) - end - - def save_file_to_s3(path, content) - s3_resource.bucket(target_bucket).object(path).put(body: content) - end - - def s3_resource - @s3_resource ||= Reports::Uploader.new_s3_resource - end - - def target_bucket - @target_bucket ||= Reports::Uploader.s3_bucket - end - - def form_json - @form_json ||= JSON.parse(submission.form_json)[form_id] - end - - def form_text_archive - form = submission.form - form[form_id]['claimDate'] ||= submission.created_at.iso8601 - form - end - - def metadata - return {} unless submission.auth_headers.present? && submission.form[form_id].present? - - extract_metadata_from_submission - end - - def extract_metadata_from_submission - address = submission.form.dig(form_id, 'veteran', 'currentMailingAddress') - zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? - pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] - pii.merge({ - fileNumber: pii['va_eauth_pnid'], - zipCode: zip || '00000', - claimDate: submission.created_at.iso8601, - formsIncluded: map_form_inclusion - }) - end - - def map_form_inclusion - VALID_VFF_FORMS.select { |type| submission.form[type].present? } - end - - def output_directory_path - @output_directory_path ||= "#{parent_dir}/#{submission.id}" - end - - def user_uploads - @user_uploads ||= submission.form['form_uploads'] - end - - def user_upload_failures - @user_upload_failures ||= [] - end - - def user_upload_path - @user_upload_path ||= "#{output_directory_path}/user_uploads" +module SimpleFormsApi + module S3Service + class ArchiveSubmissionToPdf < SimpleFormsApi::S3Service::Utils + attr_reader :failures, :form_id, :include_json_archive, :include_text_archive, + :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, + :submission + + VALID_VFF_FORMS = %w[ + 20-10206 20-10207 21-0845 21-0966 21-0972 21-10210 + 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 + ].freeze + + def initialize(form_id: nil, submission_id: nil, submission: nil, **options) + defaults = default_options.merge(options) + + @failures = [] + @form_id = form_id + @submission = submission || FormSubmission.find(submission_id) + @parent_dir = defaults[:parent_dir] + @include_text_archive = defaults[:include_text_archive] + @include_json_archive = defaults[:include_json_archive] + @quiet_upload_failures = defaults[:quiet_upload_failures] + @quiet_pdf_failures = defaults[:quiet_pdf_failures] + @run_quiet = defaults[:run_quiet] + end + + def run + log_info("Processing submission ID: #{submission.id}") + process_submission_files + output_directory_path + rescue => e + handle_run_error(e) + end + + private + + def default_options + { + include_json_archive: true, # include the form data as a JSON object + include_text_archive: true, # include the form data as a text file + parent_dir: 'wipn8923-test', + quiet_pdf_failures: true, # skip PDF generation silently + quiet_upload_failures: true, # skip problematic uploads silently + run_quiet: true + } + end + + def process_submission_files + write_pdf + write_as_json_archive if include_json_archive + write_as_text_archive if include_text_archive + write_user_uploads if user_uploads.present? + write_metadata + end + + def handle_run_error(error) + raise error unless run_quiet + + failures << { id: submission.id, error: error.message } + log_error("Failed submission: #{submission.id}", error) + end + + def write_pdf + encoded_pdf = generate_pdf_content + save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) + rescue => e + quiet_pdf_failures ? write_pdf_error(e) : raise(e) + end + + def generate_pdf_content + service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) + service.get_form(form_json.to_json).body['pdf'] + end + + def write_pdf_error(error) + log_error("PDF generation failed for submission: #{submission.id}", error) + save_file_to_s3("#{output_directory_path}/pdf_generating_failure.txt", error_details(error)) + end + + def error_details(error) + "#{error.message}\n\n#{error.backtrace.join("\n")}" + end + + def write_as_json_archive + save_file_to_s3("#{output_directory_path}/form_text_archive.json", JSON.pretty_generate(form_json)) + end + + def write_as_text_archive + save_file_to_s3("#{output_directory_path}/form_text_archive.txt", form_text_archive.to_json) + end + + def write_metadata + save_file_to_s3("#{output_directory_path}/metadata.json", metadata.to_json) + end + + def write_user_uploads + log_info("Moving #{user_uploads.count} user uploads") + user_uploads.each { |upload| process_user_upload(upload) } + write_failure_report if user_upload_failures.present? + rescue => e + handle_upload_error(e) + end + + def process_user_upload(upload) + log_info("Processing upload: #{upload['name']} - #{upload['confirmationCode']}") + local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) + raise 'Local record not found' unless local_file + + copy_file_between_buckets(local_file) + end + + def copy_file_between_buckets(local_file) + source_obj = s3_resource.bucket(local_file.get_file.uploader.aws_bucket).object(local_file.get_file.path) + target_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{local_file.get_file.filename}") + target_obj.copy_from(source_obj) + end + + def write_failure_report + save_file_to_s3("#{output_directory_path}/user_upload_failures.txt", JSON.pretty_generate(user_upload_failures)) + end + + def save_file_to_s3(path, content) + s3_resource.bucket(target_bucket).object(path).put(body: content) + end + + def form_json + @form_json ||= JSON.parse(submission.form_json)[form_id] + end + + def form_text_archive + submission.form.tap do |form| + form[form_id]['claimDate'] ||= submission.created_at.iso8601 + end + end + + def metadata + return {} unless submission.auth_headers.present? && submission.form[form_id].present? + + extract_metadata_from_submission + end + + def extract_metadata_from_submission + address = submission.form.dig(form_id, 'veteran', 'currentMailingAddress') + zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? + pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] + pii.merge({ + fileNumber: pii['va_eauth_pnid'], + zipCode: zip || '00000', + claimDate: submission.created_at.iso8601, + formsIncluded: map_form_inclusion + }) + end + + def map_form_inclusion + VALID_VFF_FORMS.select { |type| submission.form[type].present? } + end + + def output_directory_path + @output_directory_path ||= "#{parent_dir}/#{submission.id}" + end + + def user_uploads + @user_uploads ||= submission.form['form_uploads'] + end + + def user_upload_failures + @user_upload_failures ||= [] + end + + def user_upload_path + @user_upload_path ||= "#{output_directory_path}/user_uploads" + end + end end end From 04fc1f0ccfbca8aa7f52148087cf74093010ba97 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 11:30:51 -0400 Subject: [PATCH 12/38] misc tweaks and comments --- .../simple_forms_api/s3_service/archive_submission_to_pdf.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index 9501cbd1c7e..8b398acc384 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -88,6 +88,7 @@ def write_pdf quiet_pdf_failures ? write_pdf_error(e) : raise(e) end + # TODO: update this method to support configurable pdf generation logic def generate_pdf_content service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) service.get_form(form_json.to_json).body['pdf'] @@ -124,6 +125,7 @@ def write_user_uploads def process_user_upload(upload) log_info("Processing upload: #{upload['name']} - #{upload['confirmationCode']}") + # TODO: update this logic in preference of a configurable attachment type local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) raise 'Local record not found' unless local_file @@ -160,6 +162,7 @@ def metadata extract_metadata_from_submission end + # TODO: update this method to support configurable metadata def extract_metadata_from_submission address = submission.form.dig(form_id, 'veteran', 'currentMailingAddress') zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? @@ -172,6 +175,7 @@ def extract_metadata_from_submission }) end + # TODO: update this method to check against configured form list def map_form_inclusion VALID_VFF_FORMS.select { |type| submission.form[type].present? } end From 298792d4b4287c5bd5adc0c97189cefa837eea52 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 12:29:32 -0400 Subject: [PATCH 13/38] more code consolidation --- .../s3_service/archive_submission_to_pdf.rb | 24 +++++++------------ .../s3_service/submission_archive_handler.rb | 22 +++++------------ .../simple_forms_api/s3_service/utils.rb | 13 ++++++++++ 3 files changed, 27 insertions(+), 32 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index 8b398acc384..d2d9994df8a 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -37,12 +37,8 @@ def initialize(form_id: nil, submission_id: nil, submission: nil, **options) @failures = [] @form_id = form_id @submission = submission || FormSubmission.find(submission_id) - @parent_dir = defaults[:parent_dir] - @include_text_archive = defaults[:include_text_archive] - @include_json_archive = defaults[:include_json_archive] - @quiet_upload_failures = defaults[:quiet_upload_failures] - @quiet_pdf_failures = defaults[:quiet_pdf_failures] - @run_quiet = defaults[:run_quiet] + + assign_instance_variables(defaults) end def run @@ -50,7 +46,7 @@ def run process_submission_files output_directory_path rescue => e - handle_run_error(e) + handle_error("Failed submission: #{submission.id}", e, submission_id: submission.id) end private @@ -62,7 +58,7 @@ def default_options parent_dir: 'wipn8923-test', quiet_pdf_failures: true, # skip PDF generation silently quiet_upload_failures: true, # skip problematic uploads silently - run_quiet: true + run_quiet: true # silence but record errors, logged at the end } end @@ -74,13 +70,6 @@ def process_submission_files write_metadata end - def handle_run_error(error) - raise error unless run_quiet - - failures << { id: submission.id, error: error.message } - log_error("Failed submission: #{submission.id}", error) - end - def write_pdf encoded_pdf = generate_pdf_content save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) @@ -124,7 +113,10 @@ def write_user_uploads end def process_user_upload(upload) - log_info("Processing upload: #{upload['name']} - #{upload['confirmationCode']}") + log_info( + "Processing upload: #{upload['name']} - #{upload['confirmationCode']}", + { name: upload['name'], confirmation_code: upload['confirmationCode'] } + ) # TODO: update this logic in preference of a configurable attachment type local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) raise 'Local record not found' unless local_file diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index 594d521f037..765f970686a 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -10,12 +10,9 @@ def initialize(submission_ids:, **options) defaults = default_options.merge(options) @submission_ids = submission_ids - @parent_dir = defaults[:parent_dir] - @bundle_by_user = defaults[:bundle_by_user] - @run_quiet = defaults[:run_quiet] - @quiet_upload_failures = defaults[:quiet_upload_failures] - @quiet_pdf_failures = defaults[:quiet_pdf_failures] @failures = [] + + assign_instance_variables(defaults) end def run @@ -30,8 +27,8 @@ def default_options { bundle_by_user: true, parent_dir: 'wipn8923-test', - quiet_pdf_failures: false, # granular control over how user processing raises errors - quiet_upload_failures: false, # granular control over how user processing raises errors + quiet_pdf_failures: false, # granular control over how pdf processing raises errors + quiet_upload_failures: false, # granular control over how upload processing raises errors run_quiet: true # silence but record errors until the end } end @@ -70,7 +67,7 @@ def process_individual_submissions def process_user_submissions(uuid, submission_ids) UserSubmissionArchiveHandler.new(uuid:, submission_ids:, parent_dir:).run rescue => e - handle_error("User failure: #{uuid}", e, uuid:) + handle_error("User submission archiver failure: #{uuid}", e, uuid:) end def process_submission(submission_id) @@ -81,14 +78,7 @@ def process_submission(submission_id) quiet_upload_failures: ).run rescue => e - handle_error("Submission failure: #{submission_id}", e, submission_id:) - end - - def handle_error(message, error, context) - raise unless run_quiet - - log_error(message, error, context) - failures << { context => error } + handle_error("Submission archiver failure: #{submission_id}", e, submission_id:) end def cleanup_tmp_files diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb index 89378cc0331..fb08ca68e8c 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb @@ -5,6 +5,12 @@ module S3Service class Utils private + def assign_instance_variables(defaults) + defaults.each do |key, value| + instance_variable_set("@#{key}", value) + end + end + def log_info(message, **details) Rails.logger.info(message, details) end @@ -13,6 +19,13 @@ def log_error(message, error, **details) Rails.logger.error(message, details.merge(error: error.message, backtrace: error.backtrace.first(5))) end + def handle_error(message, error, context) + raise error unless run_quiet + + log_error(message, error, context) + failures << { message:, error:, **context } + end + def s3_resource @s3_resource ||= Reports::Uploader.new_s3_resource end From 57b7f2fbe252860059b702676f47aa0d9cb7adea Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 17:16:56 -0400 Subject: [PATCH 14/38] more changes to make VFF forms work --- .../simple_forms_api/v1/uploads_controller.rb | 11 +- .../s3_service/archive_submission_to_pdf.rb | 66 ++++---- .../s3_service/submission_archive_handler.rb | 10 +- .../submission_archive_handler_job.rb | 4 +- .../user_submission_archive_handler.rb | 2 +- .../simple_forms_api/s3_service/utils.rb | 2 +- .../archive_submission_to_pdf_spec.rb | 142 ++++++++++++++++++ 7 files changed, 191 insertions(+), 46 deletions(-) create mode 100644 modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb diff --git a/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb b/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb index 07b3ce599d9..d14ec82bcbb 100644 --- a/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb +++ b/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb @@ -164,9 +164,14 @@ def get_file_paths_and_metadata(parsed_form_data) end def upload_pdf(file_path, metadata, form) - location, uuid = prepare_for_upload(form, file_path) + location, uuid, submission_attempt = prepare_for_upload(form, file_path) log_upload_details(location, uuid) response = perform_pdf_upload(location, file_path, metadata, form) + SimpleFormsApi::S3Service::SubmissionArchiveHandlerJob.perform_async( + submission_ids: [submission_attempt.form_submission.id], + metadata:, + file_path: + ) [response.status, uuid] end @@ -176,9 +181,9 @@ def prepare_for_upload(form, file_path) form_id: get_form_id) location, uuid = lighthouse_service.request_upload stamp_pdf_with_uuid(form, uuid, file_path) - create_form_submission_attempt(uuid) + submission_attempt = create_form_submission_attempt(uuid) - [location, uuid] + [location, uuid, submission_attempt] end def stamp_pdf_with_uuid(form, uuid, stamped_template_path) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index d2d9994df8a..ef594e461e2 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -10,7 +10,7 @@ # 3. search for dsva-vetsgov-prod-reports # 4. search for your parent_dir name, e.g. 526archive_aug_21st_2024 # -# If you do not provide a parent_dir, the script defaults to a folder called wipn8923-test +# If you do not provide a parent_dir, the script defaults to a folder called vff-simple-forms # # OPTION 1: Run the script with user groupings # - requires SubmissionDuplicateReport object @@ -22,7 +22,7 @@ module SimpleFormsApi module S3Service class ArchiveSubmissionToPdf < SimpleFormsApi::S3Service::Utils - attr_reader :failures, :form_id, :include_json_archive, :include_text_archive, + attr_reader :failures, :include_json_archive, :include_text_archive, :metadata, :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, :submission @@ -31,11 +31,10 @@ class ArchiveSubmissionToPdf < SimpleFormsApi::S3Service::Utils 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 ].freeze - def initialize(form_id: nil, submission_id: nil, submission: nil, **options) + def initialize(submission_id: nil, submission: nil, **options) defaults = default_options.merge(options) @failures = [] - @form_id = form_id @submission = submission || FormSubmission.find(submission_id) assign_instance_variables(defaults) @@ -46,19 +45,22 @@ def run process_submission_files output_directory_path rescue => e - handle_error("Failed submission: #{submission.id}", e, submission_id: submission.id) + handle_error("Failed submission: #{submission.id}", e, { submission_id: submission.id }) end private def default_options { + file_path: nil, # file path for the PDF file to be archived include_json_archive: true, # include the form data as a JSON object include_text_archive: true, # include the form data as a text file - parent_dir: 'wipn8923-test', + metadata: {}, + parent_dir: 'vff-simple-forms', quiet_pdf_failures: true, # skip PDF generation silently quiet_upload_failures: true, # skip problematic uploads silently - run_quiet: true # silence but record errors, logged at the end + run_quiet: true, # silence but record errors, logged at the end + uploads_path: ['uploadedFile'] # hierarchy where the attachments can be found } end @@ -72,15 +74,24 @@ def process_submission_files def write_pdf encoded_pdf = generate_pdf_content - save_file_to_s3("#{output_directory_path}/form.pdf", Base64.decode64(encoded_pdf)) + pdf = save_file_to_s3( + "#{output_directory_path}/form_#{submission.form_data['form_number']}.pdf", + Base64.decode64(encoded_pdf) + ) + sign_s3_file_url(pdf) rescue => e quiet_pdf_failures ? write_pdf_error(e) : raise(e) end - # TODO: update this method to support configurable pdf generation logic def generate_pdf_content - service = EVSS::DisabilityCompensationForm::NonBreakeredService.new(submission.auth_headers) - service.get_form(form_json.to_json).body['pdf'] + raise 'Missing PDF file to upload' unless file_path + + Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) + end + + def sign_s3_file_url(pdf) + signed_url = pdf.presigned_url(:get, expires_in: 1.year.to_i) + submission.form_submission_attempts&.last&.update(signed_url:) end def write_pdf_error(error) @@ -135,41 +146,22 @@ def write_failure_report end def save_file_to_s3(path, content) - s3_resource.bucket(target_bucket).object(path).put(body: content) + s3_resource.bucket(target_bucket).object(path).tap do |obj| + obj.put(body: content) + end end def form_json - @form_json ||= JSON.parse(submission.form_json)[form_id] + @form_json ||= JSON.parse(submission.form_data) end def form_text_archive - submission.form.tap do |form| - form[form_id]['claimDate'] ||= submission.created_at.iso8601 - end - end - - def metadata - return {} unless submission.auth_headers.present? && submission.form[form_id].present? - - extract_metadata_from_submission - end - - # TODO: update this method to support configurable metadata - def extract_metadata_from_submission - address = submission.form.dig(form_id, 'veteran', 'currentMailingAddress') - zip = [address['zipFirstFive'], address['zipLastFour']].join('-') if address.present? - pii = JSON.parse(submission.auth_headers['va_eauth_authorization'])['authorizationResponse'] - pii.merge({ - fileNumber: pii['va_eauth_pnid'], - zipCode: zip || '00000', - claimDate: submission.created_at.iso8601, - formsIncluded: map_form_inclusion - }) + submission.form_data['claimDate'] ||= submission.created_at.iso8601 end # TODO: update this method to check against configured form list def map_form_inclusion - VALID_VFF_FORMS.select { |type| submission.form[type].present? } + VALID_VFF_FORMS.select { |type| submission.form_number == type } end def output_directory_path @@ -177,7 +169,7 @@ def output_directory_path end def user_uploads - @user_uploads ||= submission.form['form_uploads'] + @user_uploads ||= submission.fetch(*uploads_path, nil) end def user_upload_failures diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index 765f970686a..7df98891fd1 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -26,7 +26,9 @@ def run def default_options { bundle_by_user: true, - parent_dir: 'wipn8923-test', + file_path: nil, # file path for the PDF file to be archived + metadata: nil, + parent_dir: 'vff-simple-forms', quiet_pdf_failures: false, # granular control over how pdf processing raises errors quiet_upload_failures: false, # granular control over how upload processing raises errors run_quiet: true # silence but record errors until the end @@ -72,10 +74,12 @@ def process_user_submissions(uuid, submission_ids) def process_submission(submission_id) ArchiveSubmissionToPdf.new( - submission_id:, + file_path:, + metadata:, parent_dir:, quiet_pdf_failures:, - quiet_upload_failures: + quiet_upload_failures:, + submission_id: ).run rescue => e handle_error("Submission archiver failure: #{submission_id}", e, submission_id:) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb index 1ea5d043316..60433f9b5ef 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb @@ -22,7 +22,9 @@ def perform(submission_ids:, **options) def default_options { bundle_by_user: true, - parent_dir: 'wipn8923-test', + file_path: nil, # file path for the PDF file to be archived + metadata: nil, + parent_dir: 'vff-simple-forms', quiet_pdf_failures: false, quiet_upload_failures: false, run_quiet: true, diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb index 1b463a5e65a..99256ee8f5f 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb @@ -5,7 +5,7 @@ module S3Service class UserSubmissionArchiveHandler < SimpleFormsApi::S3Service::Utils attr_reader :uuid, :user_dir, :submission_ids - def initialize(uuid:, submission_ids:, parent_dir: 'wipn8923-test') + def initialize(uuid:, submission_ids:, parent_dir: 'vff-simple-forms') @submission_ids = submission_ids @uuid = uuid @user_dir = build_user_directory(parent_dir) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb index fb08ca68e8c..471e4b02add 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb @@ -22,7 +22,7 @@ def log_error(message, error, **details) def handle_error(message, error, context) raise error unless run_quiet - log_error(message, error, context) + log_error(message, error, **context) failures << { message:, error:, **context } end diff --git a/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb b/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb new file mode 100644 index 00000000000..d01ae14e114 --- /dev/null +++ b/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb @@ -0,0 +1,142 @@ +# frozen_string_literal: true + +require 'rails_helper' +require SimpleFormsApi::Engine.root.join('spec', 'spec_helper.rb') + +RSpec.describe SimpleFormsApi::S3Service::ArchiveSubmissionToPdf, type: :model do + let(:submission_id) { 1 } + let(:form_id) { '21-10210' } + let(:form_data) { File.read('modules/simple_forms_api/spec/fixtures/form_json/vba_21_10210.json') } + let(:submission) { create(:form_submission, :pending, form_type: form_id, form_data:) } + let(:options) do + { + include_json_archive: true, + include_text_archive: true, + parent_dir: 'test-dir', + quiet_pdf_failures: true, + quiet_upload_failures: true, + run_quiet: true + } + end + let(:archive_submission) { described_class.new(submission_id:, **options) } + + before do + allow(FormSubmission).to receive(:find).and_return(submission) + end + + describe '#initialize' do + it 'sets default values for instance variables' do + expect(archive_submission.submission).to eq(submission) + expect(archive_submission.parent_dir).to eq('test-dir') + expect(archive_submission.include_json_archive).to be(true) + expect(archive_submission.include_text_archive).to be(true) + expect(archive_submission.quiet_pdf_failures).to be(true) + expect(archive_submission.quiet_upload_failures).to be(true) + end + end + + describe '#run' do + before do + allow(archive_submission).to receive(:process_submission_files) + allow(archive_submission).to receive(:output_directory_path).and_return('/some/path') + allow(archive_submission).to receive(:log_info) + end + + it 'logs the processing of the submission and calls process_submission_files' do + expect(archive_submission).to receive(:log_info).with("Processing submission ID: #{submission.id}") + expect(archive_submission).to receive(:process_submission_files) + archive_submission.run + end + + context 'when an error occurs' do + before do + allow(archive_submission).to receive(:process_submission_files).and_raise(StandardError, 'Processing error') + end + + xit 'handles errors and logs them' do + expect(archive_submission).to( + receive(:handle_error).with( + "Failed submission: #{submission.id}", + instance_of(StandardError), submission_id: submission.id + ) + ) + expect { archive_submission.run }.not_to raise_error + end + end + end + + describe '#write_pdf' do + before do + allow(archive_submission).to receive(:generate_pdf_content).and_return(Base64.encode64('pdf content')) + allow(archive_submission).to receive(:save_file_to_s3) + end + + xit 'writes the PDF to S3' do + expect(archive_submission).to receive(:save_file_to_s3).with(/form.pdf/, 'pdf content') + archive_submission.run + end + + context 'when an error occurs' do + before do + allow(archive_submission).to receive(:generate_pdf_content).and_raise(StandardError, 'PDF generation error') + end + + it 'handles pdf generation errors based on quiet_pdf_failures' do + expect(archive_submission).to receive(:write_pdf_error).with(instance_of(StandardError)) + expect { archive_submission.run }.not_to raise_error + end + end + end + + describe '#write_as_json_archive' do + before do + allow(archive_submission).to receive(:save_file_to_s3) + allow(archive_submission).to receive(:form_json).and_return({ key: 'value' }) + end + + it 'writes the JSON archive to S3' do + expect(archive_submission).to receive(:save_file_to_s3).with(/form_text_archive.json/, + JSON.pretty_generate({ key: 'value' })) + archive_submission.run + end + end + + describe '#write_as_text_archive' do + before do + allow(archive_submission).to receive(:save_file_to_s3) + allow(archive_submission).to receive(:form_text_archive).and_return({ key: 'value' }) + end + + it 'writes the text archive to S3' do + expect(archive_submission).to receive(:save_file_to_s3).with(/form_text_archive.txt/, { key: 'value' }.to_json) + archive_submission.run + end + end + + describe '#write_metadata' do + before do + allow(archive_submission).to receive(:save_file_to_s3) + allow(archive_submission).to receive(:metadata).and_return({ key: 'value' }) + end + + xit 'writes metadata to S3' do + expect(archive_submission).to receive(:save_file_to_s3).with(/metadata.json/, { key: 'value' }.to_json) + archive_submission.run + end + end + + describe '#handle_error' do + before do + allow(archive_submission).to receive(:process_submission_files).and_return(error) + end + + let(:error) { StandardError.new('some error') } + + xit 'logs the error and re-raises it' do + expect(archive_submission).to receive(:log_error).with( + "Failed submission: #{submission.id}", error, submission_id: submission.id + ) + expect { archive_submission.run }.to raise_error(error) + end + end +end From 621e210253e6dd76d26abd1c2d25d0dbb6c7b5eb Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Fri, 6 Sep 2024 17:26:35 -0400 Subject: [PATCH 15/38] remove verbose inheritance --- .../simple_forms_api/s3_service/archive_submission_to_pdf.rb | 2 +- .../simple_forms_api/s3_service/submission_archive_handler.rb | 2 +- .../s3_service/submission_archive_handler_job.rb | 2 +- .../s3_service/user_submission_archive_handler.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index ef594e461e2..252698af86a 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -21,7 +21,7 @@ # this will just put each submission in a folder by it's id under the parent dir module SimpleFormsApi module S3Service - class ArchiveSubmissionToPdf < SimpleFormsApi::S3Service::Utils + class ArchiveSubmissionToPdf < Utils attr_reader :failures, :include_json_archive, :include_text_archive, :metadata, :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, :submission diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index 7df98891fd1..ff1ff349e6c 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -2,7 +2,7 @@ module SimpleFormsApi module S3Service - class SubmissionArchiveHandler < SimpleFormsApi::S3Service::Utils + class SubmissionArchiveHandler < Utils attr_reader :submission_ids, :parent_dir, :successes, :failures, :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb index 60433f9b5ef..51f0572b093 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb @@ -2,7 +2,7 @@ module SimpleFormsApi module S3Service - class SubmissionArchiveHandlerJob < SimpleFormsApi::S3Service::Utils + class SubmissionArchiveHandlerJob < Utils include Sidekiq::Worker sidekiq_options retry: 3, queue: 'default' diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb index 99256ee8f5f..ee513378564 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb @@ -2,7 +2,7 @@ module SimpleFormsApi module S3Service - class UserSubmissionArchiveHandler < SimpleFormsApi::S3Service::Utils + class UserSubmissionArchiveHandler < Utils attr_reader :uuid, :user_dir, :submission_ids def initialize(uuid:, submission_ids:, parent_dir: 'vff-simple-forms') From 79f40154a1bd2d6c1b5632663c422c45c10cd0eb Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 14:13:00 -0400 Subject: [PATCH 16/38] updates in accordance with PR feedback --- .../s3_service/archive_submission_to_pdf.rb | 22 +++++-------------- .../s3_service/submission_archive_handler.rb | 14 ++++-------- .../submission_archive_handler_job.rb | 9 +++----- .../user_submission_archive_handler.rb | 5 ++--- .../simple_forms_api/s3_service/utils.rb | 4 +--- 5 files changed, 15 insertions(+), 39 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb index 252698af86a..e44d81e16fa 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb @@ -22,16 +22,14 @@ module SimpleFormsApi module S3Service class ArchiveSubmissionToPdf < Utils - attr_reader :failures, :include_json_archive, :include_text_archive, :metadata, - :parent_dir, :quiet_pdf_failures, :quiet_upload_failures, :run_quiet, - :submission + attr_reader :failures, :include_json_archive, :include_text_archive, :metadata, :parent_dir, :submission VALID_VFF_FORMS = %w[ 20-10206 20-10207 21-0845 21-0966 21-0972 21-10210 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 ].freeze - def initialize(submission_id: nil, submission: nil, **options) + def initialize(submission_id: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) @failures = [] @@ -55,11 +53,8 @@ def default_options file_path: nil, # file path for the PDF file to be archived include_json_archive: true, # include the form data as a JSON object include_text_archive: true, # include the form data as a text file - metadata: {}, - parent_dir: 'vff-simple-forms', - quiet_pdf_failures: true, # skip PDF generation silently - quiet_upload_failures: true, # skip problematic uploads silently - run_quiet: true, # silence but record errors, logged at the end + metadata: {}, # pertinent metadata for original file upload/submission + parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live uploads_path: ['uploadedFile'] # hierarchy where the attachments can be found } end @@ -79,8 +74,6 @@ def write_pdf Base64.decode64(encoded_pdf) ) sign_s3_file_url(pdf) - rescue => e - quiet_pdf_failures ? write_pdf_error(e) : raise(e) end def generate_pdf_content @@ -90,15 +83,10 @@ def generate_pdf_content end def sign_s3_file_url(pdf) - signed_url = pdf.presigned_url(:get, expires_in: 1.year.to_i) + signed_url = pdf.presigned_url(:get, expires_in: 30.minutes.to_i) submission.form_submission_attempts&.last&.update(signed_url:) end - def write_pdf_error(error) - log_error("PDF generation failed for submission: #{submission.id}", error) - save_file_to_s3("#{output_directory_path}/pdf_generating_failure.txt", error_details(error)) - end - def error_details(error) "#{error.message}\n\n#{error.backtrace.join("\n")}" end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index ff1ff349e6c..f4f38409144 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -3,10 +3,9 @@ module SimpleFormsApi module S3Service class SubmissionArchiveHandler < Utils - attr_reader :submission_ids, :parent_dir, :successes, :failures, - :bundle_by_user, :run_quiet, :quiet_upload_failures, :quiet_pdf_failures + attr_reader :submission_ids, :parent_dir, :successes, :failures, :bundle_by_user - def initialize(submission_ids:, **options) + def initialize(submission_ids:, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) @submission_ids = submission_ids @@ -27,11 +26,8 @@ def default_options { bundle_by_user: true, file_path: nil, # file path for the PDF file to be archived - metadata: nil, - parent_dir: 'vff-simple-forms', - quiet_pdf_failures: false, # granular control over how pdf processing raises errors - quiet_upload_failures: false, # granular control over how upload processing raises errors - run_quiet: true # silence but record errors until the end + metadata: {}, # pertinent metadata for original file upload/submission + parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live } end @@ -77,8 +73,6 @@ def process_submission(submission_id) file_path:, metadata:, parent_dir:, - quiet_pdf_failures:, - quiet_upload_failures:, submission_id: ).run rescue => e diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb index 51f0572b093..535f19e86fa 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb @@ -23,12 +23,9 @@ def default_options { bundle_by_user: true, file_path: nil, # file path for the PDF file to be archived - metadata: nil, - parent_dir: 'vff-simple-forms', - quiet_pdf_failures: false, - quiet_upload_failures: false, - run_quiet: true, - signed_link: false + metadata: {}, # pertinent metadata for original file upload/submission + parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live + signed_link: false # TODO: Will we ever need to make this optional? } end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb index ee513378564..e02283765ad 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb @@ -5,7 +5,7 @@ module S3Service class UserSubmissionArchiveHandler < Utils attr_reader :uuid, :user_dir, :submission_ids - def initialize(uuid:, submission_ids:, parent_dir: 'vff-simple-forms') + def initialize(uuid:, submission_ids:, parent_dir: 'vff-simple-forms') # rubocop:disable Lint/MissingSuper @submission_ids = submission_ids @uuid = uuid @user_dir = build_user_directory(parent_dir) @@ -17,8 +17,7 @@ def run log_info("Archive completed for user: #{uuid}") user_dir rescue => e - log_error("Error in archive process for user: #{uuid}", e) - raise e + handle_error("Error in archive process for user: #{uuid}", e) end private diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb index 471e4b02add..b7f576bad02 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb @@ -20,10 +20,8 @@ def log_error(message, error, **details) end def handle_error(message, error, context) - raise error unless run_quiet - log_error(message, error, **context) - failures << { message:, error:, **context } + raise error end def s3_resource From fd3c32455ad192af40bdb70a66089cd953b053cd Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 14:23:15 -0400 Subject: [PATCH 17/38] more misc changes --- .../s3_service/submission_archive_handler.rb | 7 +------ .../s3_service/submission_archive_handler_job.rb | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index f4f38409144..6f92fbb63ae 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -69,12 +69,7 @@ def process_user_submissions(uuid, submission_ids) end def process_submission(submission_id) - ArchiveSubmissionToPdf.new( - file_path:, - metadata:, - parent_dir:, - submission_id: - ).run + ArchiveSubmissionToPdf.new(file_path:, metadata:, parent_dir:, submission_id:).run rescue => e handle_error("Submission archiver failure: #{submission_id}", e, submission_id:) end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb index 535f19e86fa..9d029067050 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb @@ -14,7 +14,7 @@ def perform(submission_ids:, **options) result_dir = runner.run log_info("Job completed successfully. Results saved in directory: #{result_dir}") rescue => e - handle_job_error(e) + handle_error('SubmissionArchiveHandlerJob failed.', e) end private @@ -28,11 +28,6 @@ def default_options signed_link: false # TODO: Will we ever need to make this optional? } end - - def handle_job_error(error) - log_error('SubmissionArchiveHandlerJob failed.', error) - raise error - end end end end From 814f2e896df0eb3a32c7cba0694a0bf841d5e511 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 16:04:17 -0400 Subject: [PATCH 18/38] many changes, stripping unnecessary logic --- .../jobs/submission_archive_handler_job.rb | 36 +++++++ .../s3_service/submission_archive_handler.rb | 49 +++------- .../submission_archive_handler_job.rb | 33 ------- ...ssion_to_pdf.rb => submission_archiver.rb} | 93 +++++++++---------- .../user_submission_archive_handler.rb | 53 ----------- .../archive_submission_to_pdf_spec.rb | 2 +- 6 files changed, 94 insertions(+), 172 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb delete mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb rename modules/simple_forms_api/app/services/simple_forms_api/s3_service/{archive_submission_to_pdf.rb => submission_archiver.rb} (62%) delete mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb new file mode 100644 index 00000000000..2a053a86626 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +module SimpleFormsApi + module S3Service + module Jobs + class SubmissionArchiveHandlerJob < SimpleFormsApi::S3Service::Utils + include Sidekiq::Worker + + sidekiq_options retry: 3, queue: 'default' + + def perform(benefits_intake_uuids:, **options) + defaults = default_options.merge(options) + + runner = SubmissionArchiveHandler.new(benefits_intake_uuids:, **defaults) + result_dir = runner.run + log_info("Job completed successfully. Results saved in directory: #{result_dir}") + rescue => e + handle_error('SubmissionArchiveHandlerJob failed.', e) + end + + private + + def default_options + { + attachments: [], + bundle_by_user: true, + file_path: nil, # file path for the PDF file to be archived + metadata: {}, # pertinent metadata for original file upload/submission + parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live + signed_link: false # TODO: Will we ever need to make this optional? + } + end + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index 6f92fbb63ae..6e8aecc6d40 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -3,19 +3,19 @@ module SimpleFormsApi module S3Service class SubmissionArchiveHandler < Utils - attr_reader :submission_ids, :parent_dir, :successes, :failures, :bundle_by_user + attr_reader :attachments, :benefits_intake_uuids, :parent_dir, :successes, :failures, :bundle_by_user - def initialize(submission_ids:, **options) # rubocop:disable Lint/MissingSuper + def initialize(benefits_intake_uuids: [], **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) - @submission_ids = submission_ids + @benefits_intake_uuids = benefits_intake_uuids @failures = [] assign_instance_variables(defaults) end def run - bundle_by_user ? process_by_user : process_individual_submissions + process_individual_submissions cleanup_tmp_files parent_dir end @@ -24,6 +24,7 @@ def run def default_options { + attachments: [], bundle_by_user: true, file_path: nil, # file path for the PDF file to be archived metadata: {}, # pertinent metadata for original file upload/submission @@ -32,46 +33,22 @@ def default_options end def submissions - @submissions ||= FormSubmission.where(id: submission_ids) - end - - def submissions_by_uuid - @submissions_by_uuid ||= group_submissions_by_uuid - end - - def group_submissions_by_uuid - submissions.group_by(&:user_uuid).transform_values do |user_submissions| - user_submissions.map(&:id) - end - end - - def process_by_user - submissions_by_uuid.each do |uuid, submission_ids| - log_info("Processing for user: #{uuid} with #{submission_ids.size} submission(s)", uuid:, submission_ids:) - process_user_submissions(uuid, submission_ids) - end + @submissions ||= FormSubmission.where(benefits_intake_uuid: benefits_intake_uuids) end def process_individual_submissions submissions.each_with_index do |sub, idx| - log_info( - "Processing submission: #{sub.id} (non-grouped) ##{idx + 1} of #{submissions.count} total submissions", - submission_id: sub.id, submission_count: submissions.count - ) - process_submission(sub.id) + message = "Processing submission: #{sub.benefits_intake_uuid} (non-grouped)" \ + "##{idx + 1} of #{submissions.count} total submissions" + log_info(message, benefits_intake_uuid: sub.benefits_intake_uuid, submission_count: submissions.count) + process_submission(sub.benefits_intake_uuid) end end - def process_user_submissions(uuid, submission_ids) - UserSubmissionArchiveHandler.new(uuid:, submission_ids:, parent_dir:).run - rescue => e - handle_error("User submission archiver failure: #{uuid}", e, uuid:) - end - - def process_submission(submission_id) - ArchiveSubmissionToPdf.new(file_path:, metadata:, parent_dir:, submission_id:).run + def process_submission(benefits_intake_uuid) + SubmissionArchiver.new(attachments:, file_path:, metadata:, parent_dir:, benefits_intake_uuid:).run rescue => e - handle_error("Submission archiver failure: #{submission_id}", e, submission_id:) + handle_error("Submission archiver failure: #{benefits_intake_uuid}", e, benefits_intake_uuid:) end def cleanup_tmp_files diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb deleted file mode 100644 index 9d029067050..00000000000 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler_job.rb +++ /dev/null @@ -1,33 +0,0 @@ -# frozen_string_literal: true - -module SimpleFormsApi - module S3Service - class SubmissionArchiveHandlerJob < Utils - include Sidekiq::Worker - - sidekiq_options retry: 3, queue: 'default' - - def perform(submission_ids:, **options) - defaults = default_options.merge(options) - - runner = SubmissionArchiveHandler.new(submission_ids:, **defaults) - result_dir = runner.run - log_info("Job completed successfully. Results saved in directory: #{result_dir}") - rescue => e - handle_error('SubmissionArchiveHandlerJob failed.', e) - end - - private - - def default_options - { - bundle_by_user: true, - file_path: nil, # file path for the PDF file to be archived - metadata: {}, # pertinent metadata for original file upload/submission - parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live - signed_link: false # TODO: Will we ever need to make this optional? - } - end - end - end -end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb similarity index 62% rename from modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb index e44d81e16fa..844b315ffea 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/archive_submission_to_pdf.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb @@ -17,23 +17,27 @@ # - SubmissionArchiveHandler.new(submission_ids: ids, parent_dir:).run # # OPTION 2: Run without user groupings -# ids.each { |id| ArchiveSubmissionToPdf.new(submission_id: id, parent_dir:).run } +# ids.each { |id| SubmissionArchiver.new(submission_id: id, parent_dir:).run } # this will just put each submission in a folder by it's id under the parent dir module SimpleFormsApi module S3Service - class ArchiveSubmissionToPdf < Utils - attr_reader :failures, :include_json_archive, :include_text_archive, :metadata, :parent_dir, :submission - - VALID_VFF_FORMS = %w[ - 20-10206 20-10207 21-0845 21-0966 21-0972 21-10210 - 21-4138 21-4142 21P-0847 26-4555 40-0247 40-10007 - ].freeze + class SubmissionArchiver < Utils + attr_reader :benefits_intake_uuid, :failures, :include_json_archive, :include_text_archive, :metadata, + :parent_dir, :submission + + class << self + def fetch_presigned_url(benefits_intake_uuid) + instance = self.class.new(benefits_intake_uuid:) + instance.fetch_pdf(benefits_intake_uuid, form_number) + # return presigned_url from object + end + end - def initialize(submission_id: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper + def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) @failures = [] - @submission = submission || FormSubmission.find(submission_id) + @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) assign_instance_variables(defaults) end @@ -43,7 +47,7 @@ def run process_submission_files output_directory_path rescue => e - handle_error("Failed submission: #{submission.id}", e, { submission_id: submission.id }) + handle_error("Failed submission: #{submission.id}", e, { submission_id: submission.id, benefits_intake_uuid: }) end private @@ -63,7 +67,7 @@ def process_submission_files write_pdf write_as_json_archive if include_json_archive write_as_text_archive if include_text_archive - write_user_uploads if user_uploads.present? + write_attachments if attachments.present? write_metadata end @@ -82,9 +86,15 @@ def generate_pdf_content Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) end + def fetch_pdf + path = "#{output_directory_path}/form_#{submission.form_data['form_number']}.pdf" + s3_resource.bucket(target_bucket).object(path) + end + def sign_s3_file_url(pdf) signed_url = pdf.presigned_url(:get, expires_in: 30.minutes.to_i) - submission.form_submission_attempts&.last&.update(signed_url:) + # TODO: How do we want to handle this? + # submission.form_submission_attempts&.last&.update(signed_url:) end def error_details(error) @@ -92,10 +102,12 @@ def error_details(error) end def write_as_json_archive + form_json = JSON.parse(submission.form_data) save_file_to_s3("#{output_directory_path}/form_text_archive.json", JSON.pretty_generate(form_json)) end def write_as_text_archive + form_text_archive = submission.form_data['claimDate'] ||= submission.created_at.iso8601 save_file_to_s3("#{output_directory_path}/form_text_archive.txt", form_text_archive.to_json) end @@ -103,34 +115,34 @@ def write_metadata save_file_to_s3("#{output_directory_path}/metadata.json", metadata.to_json) end - def write_user_uploads - log_info("Moving #{user_uploads.count} user uploads") - user_uploads.each { |upload| process_user_upload(upload) } - write_failure_report if user_upload_failures.present? + def write_attachments + log_info("Moving #{attachments.count} user uploads") + attachments.each { |upload| process_attachment(upload) } + write_attachment_failure_report if attachment_failures.present? rescue => e handle_upload_error(e) end - def process_user_upload(upload) - log_info( - "Processing upload: #{upload['name']} - #{upload['confirmationCode']}", - { name: upload['name'], confirmation_code: upload['confirmationCode'] } - ) - # TODO: update this logic in preference of a configurable attachment type - local_file = SupportingEvidenceAttachment.find_by(guid: upload['confirmationCode']) + def process_attachment(attachment) + log_info("Processing attachment: #{attachment}") + local_file = PersistentAttachment.find_by(guid: attachment) raise 'Local record not found' unless local_file copy_file_between_buckets(local_file) + rescue => e + attachment_failures << e + handle_error('Attachment failure.', e) + raise e end def copy_file_between_buckets(local_file) source_obj = s3_resource.bucket(local_file.get_file.uploader.aws_bucket).object(local_file.get_file.path) - target_obj = s3_resource.bucket(target_bucket).object("#{user_upload_path}/#{local_file.get_file.filename}") + target_obj = s3_resource.bucket(target_bucket).object("#{attachment_path}/#{local_file.get_file.filename}") target_obj.copy_from(source_obj) end - def write_failure_report - save_file_to_s3("#{output_directory_path}/user_upload_failures.txt", JSON.pretty_generate(user_upload_failures)) + def write_attachment_failure_report + save_file_to_s3("#{output_directory_path}/attachment_failures.txt", JSON.pretty_generate(attachment_failures)) end def save_file_to_s3(path, content) @@ -139,33 +151,16 @@ def save_file_to_s3(path, content) end end - def form_json - @form_json ||= JSON.parse(submission.form_data) - end - - def form_text_archive - submission.form_data['claimDate'] ||= submission.created_at.iso8601 - end - - # TODO: update this method to check against configured form list - def map_form_inclusion - VALID_VFF_FORMS.select { |type| submission.form_number == type } - end - def output_directory_path - @output_directory_path ||= "#{parent_dir}/#{submission.id}" - end - - def user_uploads - @user_uploads ||= submission.fetch(*uploads_path, nil) + @output_directory_path ||= "#{parent_dir}/#{benefits_intake_uuid}" end - def user_upload_failures - @user_upload_failures ||= [] + def attachment_failures + @attachment_failures ||= [] end - def user_upload_path - @user_upload_path ||= "#{output_directory_path}/user_uploads" + def attachment_path + @attachment_path ||= "#{output_directory_path}/attachments" end end end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb deleted file mode 100644 index e02283765ad..00000000000 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/user_submission_archive_handler.rb +++ /dev/null @@ -1,53 +0,0 @@ -# frozen_string_literal: true - -module SimpleFormsApi - module S3Service - class UserSubmissionArchiveHandler < Utils - attr_reader :uuid, :user_dir, :submission_ids - - def initialize(uuid:, submission_ids:, parent_dir: 'vff-simple-forms') # rubocop:disable Lint/MissingSuper - @submission_ids = submission_ids - @uuid = uuid - @user_dir = build_user_directory(parent_dir) - end - - def run - log_info("Starting archive for user: #{uuid}, Submissions: #{submission_ids}") - write_user_submissions - log_info("Archive completed for user: #{uuid}") - user_dir - rescue => e - handle_error("Error in archive process for user: #{uuid}", e) - end - - private - - def build_user_directory(parent_dir) - "#{parent_dir}/#{uuid}" - end - - def write_user_submissions - submissions.each do |submission| - archive_submission(submission) - rescue => e - log_error("Failed to archive submission: #{submission.id} for user: #{uuid}", e) - end - end - - def archive_submission(submission) - log_info("Processing submission: #{submission.id}") - ArchiveSubmissionToPdf.new(submission:, parent_dir: user_dir).run - end - - def submissions - @submissions ||= fetch_submissions - end - - def fetch_submissions - FormSubmission.where(id: submission_ids).tap do |subs| - log_info("Fetched #{subs.count} submissions for user: #{uuid}") - end - end - end - end -end diff --git a/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb b/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb index d01ae14e114..27bf435b79f 100644 --- a/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb +++ b/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb @@ -3,7 +3,7 @@ require 'rails_helper' require SimpleFormsApi::Engine.root.join('spec', 'spec_helper.rb') -RSpec.describe SimpleFormsApi::S3Service::ArchiveSubmissionToPdf, type: :model do +RSpec.describe SimpleFormsApi::S3Service::SubmissionArchiver, type: :model do let(:submission_id) { 1 } let(:form_id) { '21-10210' } let(:form_data) { File.read('modules/simple_forms_api/spec/fixtures/form_json/vba_21_10210.json') } From 45935832b770679f65aeab4ac0843f7821d1586a Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 16:34:00 -0400 Subject: [PATCH 19/38] misc tweaks --- .../s3_service/submission_archiver.rb | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb index 844b315ffea..69452294d25 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb @@ -22,21 +22,20 @@ module SimpleFormsApi module S3Service class SubmissionArchiver < Utils - attr_reader :benefits_intake_uuid, :failures, :include_json_archive, :include_text_archive, :metadata, - :parent_dir, :submission + attr_reader :benefits_intake_uuid, :include_json_archive, :include_text_archive, :metadata, :parent_dir, + :submission class << self def fetch_presigned_url(benefits_intake_uuid) instance = self.class.new(benefits_intake_uuid:) instance.fetch_pdf(benefits_intake_uuid, form_number) - # return presigned_url from object + # TODO: return presigned_url from object end end def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) - @failures = [] @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) assign_instance_variables(defaults) @@ -54,12 +53,12 @@ def run def default_options { + attachments: [], # an array of attachment confirmation codes file_path: nil, # file path for the PDF file to be archived include_json_archive: true, # include the form data as a JSON object include_text_archive: true, # include the form data as a text file metadata: {}, # pertinent metadata for original file upload/submission - parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live - uploads_path: ['uploadedFile'] # hierarchy where the attachments can be found + parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live } end @@ -116,7 +115,7 @@ def write_metadata end def write_attachments - log_info("Moving #{attachments.count} user uploads") + log_info("Moving #{attachments.count} attachments") attachments.each { |upload| process_attachment(upload) } write_attachment_failure_report if attachment_failures.present? rescue => e From 3788cdf69af759b92c7148de5f1d417d274b12ce Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 16:34:36 -0400 Subject: [PATCH 20/38] rip off debt management center sharepoint service --- .../share_point_service/service.rb | 245 ++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb new file mode 100644 index 00000000000..effa2e23ae9 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb @@ -0,0 +1,245 @@ +# frozen_string_literal: true + +require 'faraday/multipart' + +module SimpleFormsApi + module SharePointService + class Service + extend Forwardable + include Common::Client::Concerns::Monitoring + + class ListItemNotFound < StandardError; end + + STATSD_KEY_PREFIX = 'api.vha.financial_status_report.sharepoint.request' + + attr_reader :settings + attr_accessor :access_token, :user + + def_delegators :settings, :sharepoint_url, :client_id, :client_secret, :tenant_id, :resource, :service_name, + :base_path, :authentication_url + + def initialize + @settings = initialize_settings + @access_token = set_sharepoint_access_token + end + + ## + # Upload a PDF file of 5655 form to VHA SharePoint + # + # @param form_contents [Hash] - The JSON of the form + # @param form_submission [Form5655Submission] - Persisted submission of the form + # @param station_id [String] - The VHA station the form belongs to + # + # @return [Faraday::Response] - Response from SharePoint upload + # + def upload(form_contents:, form_submission:, station_id:) + @user = set_user_data(form_submission.user_account_id) + upload_response = upload_pdf(form_contents:, form_submission:, + station_id:) + + list_item_id = get_pdf_list_item_id(upload_response) + + resp = update_list_item_fields(list_item_id:, form_submission:, station_id:) + if resp.success? + StatsD.increment("#{STATSD_KEY_PREFIX}.success") + else + StatsD.increment("#{STATSD_KEY_PREFIX}.failure") + end + resp + rescue => e + StatsD.increment("#{STATSD_KEY_PREFIX}.failure") + Rails.logger.error('Sharepoint Upload failed', e.message) + raise e + end + + private + + ## + # Set the access token for SharePoint authentication from Microsoft Access Control + # + # @return [String] - The access token + # + def set_sharepoint_access_token + auth_response = auth_connection.post("/#{tenant_id}/tokens/OAuth/2", { + grant_type: 'client_credentials', + client_id: "#{client_id}@#{tenant_id}", + client_secret:, + resource: "#{resource}/#{sharepoint_url}@#{tenant_id}" + }) + + auth_response.body['access_token'] + end + + def set_user_data(user_account_id) + user_account = UserAccount.find(user_account_id) + user_profile = mpi_service.find_profile_by_identifier(identifier: user_account.icn, + identifier_type: MPI::Constants::ICN) + + { + ssn: user_profile.profile.ssn, + first_name: user_profile.profile.given_names.first, + last_name: user_profile.profile.family_name + } + end + + ## + # Upload PDF document to SharePoint site + # + # @param form_contents [Hash] - Contents to fill form with + # @param form_submission [Form5655Submission] - Persisted form + # @param station_id [String] - VHA Station identifier + # + # @return [Faraday::Response] + # + def upload_pdf(form_contents:, form_submission:, station_id:) + pdf_path = PdfFill::Filler.fill_ancillary_form(form_contents, "#{form_submission.id}-#{station_id}", '5655') + fsr_pdf = File.open(pdf_path) + + file_name = "#{DateTime.now.strftime('%Y%m%dT%H%M%S')}_#{user[:ssn].last(4)}_#{user[:last_name].tr(' ', '_')}" + + file_transfer_path = + "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')" \ + "/Files/add(url='#{file_name}.pdf',overwrite=true)" + + with_monitoring do + response = sharepoint_file_connection.post(file_transfer_path) do |req| + req.headers['Content-Type'] = 'octet/stream' + req.headers['Content-Length'] = fsr_pdf.size.to_s + req.body = Faraday::UploadIO.new(fsr_pdf, 'octet/stream') + end + + File.delete(pdf_path) + + response + end + end + + ## + # Get the ID of the uploaded document's list item + # + # @param pdf_upload_response [Faraday::Response] - Network response from initial upload + # + # @return [Number] + # + def get_pdf_list_item_id(pdf_upload_response) + uri = pdf_upload_response.body['d']['ListItemAllFields']['__deferred']['uri'] + path = uri.slice(uri.index(base_path)..-1) + + with_monitoring do + get_item_response = sharepoint_connection.get(path) + + list_item_id = get_item_response.body.dig('d', 'ID') + raise ListItemNotFound if list_item_id.nil? + + list_item_id + end + end + + ## + # Populate data columns with properties needed by VHA + # + # @param list_item_id[Number] - ID of SharePoint list item + # @param form_submission [Form5655Submission] - Persisted form + # @param station_id [String] - VHA Station identifier + # + # @return [Faraday::Response] + # + def update_list_item_fields(list_item_id:, form_submission:, station_id:) + path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" + with_monitoring do + sharepoint_connection.post(path) do |req| + req.headers['Content-Type'] = 'application/json;odata=verbose' + req.headers['X-HTTP-METHOD'] = 'MERGE' + req.headers['If-Match'] = '*' + req.body = { + '__metadata' => { + 'type' => 'SP.Data.SubmissionsItem' + }, + 'StationId' => station_id, + 'UID' => form_submission.id, + 'SSN' => user[:ssn], + 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" + }.to_json + end + end + end + + def auth_connection + Faraday.new(url: authentication_url, headers: auth_headers) do |conn| + conn.request :url_encoded + conn.use :breakers + conn.use Faraday::Response::RaiseError + conn.response :raise_custom_error, error_prefix: service_name + conn.response :json + conn.response :betamocks if mock_enabled? + conn.adapter Faraday.default_adapter + end + end + + def sharepoint_connection + Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| + conn.request :json + conn.use :breakers + conn.use Faraday::Response::RaiseError + conn.response :raise_custom_error, error_prefix: service_name + conn.response :json + conn.response :betamocks if mock_enabled? + conn.adapter Faraday.default_adapter + end + end + + def sharepoint_file_connection + Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| + conn.request :multipart + conn.request :url_encoded + conn.use :breakers + conn.use Faraday::Response::RaiseError + conn.response :raise_custom_error, error_prefix: service_name + conn.response :json + conn.response :betamocks if mock_enabled? + conn.adapter Faraday.default_adapter + end + end + + ## + # HTTP headers for Microsoft Access Control authentication + # + # @return [Hash] + # + def auth_headers + { + 'Content-Type' => 'application/x-www-form-urlencoded' + } + end + + ## + # HTTP headers for uploading documents to SharePoint + # + # @return [Hash] + # + def sharepoint_headers + { + 'Authorization' => "Bearer #{access_token}", + 'Accept' => 'application/json;odata=verbose' + } + end + + def initialize_settings + @settings = Settings.vha.sharepoint + end + + def mpi_service + @service ||= MPI::Service.new + end + + ## + # Betamocks enabled status from settings + # + # @return [Boolean] + # + def mock_enabled? + settings.mock || false + end + end + end +end From 146ca82f2f2f2bc2b7a581380beb79382f77c16d Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 16:45:26 -0400 Subject: [PATCH 21/38] first pass at code refinement and exploration --- .../share_point_service/service.rb | 199 ++++++++++-------- 1 file changed, 109 insertions(+), 90 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb index effa2e23ae9..ee1835db1ce 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb @@ -24,7 +24,7 @@ def initialize end ## - # Upload a PDF file of 5655 form to VHA SharePoint + # Upload a PDF file to VHA SharePoint # # @param form_contents [Hash] - The JSON of the form # @param form_submission [Form5655Submission] - Persisted submission of the form @@ -33,23 +33,13 @@ def initialize # @return [Faraday::Response] - Response from SharePoint upload # def upload(form_contents:, form_submission:, station_id:) - @user = set_user_data(form_submission.user_account_id) - upload_response = upload_pdf(form_contents:, form_submission:, - station_id:) + set_user_data(form_submission.user_account_id) + upload_response = upload_pdf(form_contents:, form_submission:, station_id:) + list_item_id = fetch_list_item_id(upload_response) - list_item_id = get_pdf_list_item_id(upload_response) - - resp = update_list_item_fields(list_item_id:, form_submission:, station_id:) - if resp.success? - StatsD.increment("#{STATSD_KEY_PREFIX}.success") - else - StatsD.increment("#{STATSD_KEY_PREFIX}.failure") - end - resp + update_sharepoint_item(list_item_id:, form_submission:, station_id:) rescue => e - StatsD.increment("#{STATSD_KEY_PREFIX}.failure") - Rails.logger.error('Sharepoint Upload failed', e.message) - raise e + handle_upload_error(e) end private @@ -60,25 +50,35 @@ def upload(form_contents:, form_submission:, station_id:) # @return [String] - The access token # def set_sharepoint_access_token - auth_response = auth_connection.post("/#{tenant_id}/tokens/OAuth/2", { - grant_type: 'client_credentials', - client_id: "#{client_id}@#{tenant_id}", - client_secret:, - resource: "#{resource}/#{sharepoint_url}@#{tenant_id}" - }) - + auth_response = auth_connection.post("/#{tenant_id}/tokens/OAuth/2", auth_params) auth_response.body['access_token'] end + def auth_params + { + grant_type: 'client_credentials', + client_id: "#{client_id}@#{tenant_id}", + client_secret:, + resource: "#{resource}/#{sharepoint_url}@#{tenant_id}" + } + end + def set_user_data(user_account_id) user_account = UserAccount.find(user_account_id) - user_profile = mpi_service.find_profile_by_identifier(identifier: user_account.icn, - identifier_type: MPI::Constants::ICN) + user_profile = fetch_user_profile(user_account.icn) + @user = extract_user_info(user_profile) + end + # TODO: what is MPI service and can we use it? + def fetch_user_profile(icn) + mpi_service.find_profile_by_identifier(identifier: icn, identifier_type: MPI::Constants::ICN) + end + + def extract_user_info(profile) { - ssn: user_profile.profile.ssn, - first_name: user_profile.profile.given_names.first, - last_name: user_profile.profile.family_name + ssn: profile.ssn, + first_name: profile.given_names.first, + last_name: profile.family_name } end @@ -92,43 +92,57 @@ def set_user_data(user_account_id) # @return [Faraday::Response] # def upload_pdf(form_contents:, form_submission:, station_id:) - pdf_path = PdfFill::Filler.fill_ancillary_form(form_contents, "#{form_submission.id}-#{station_id}", '5655') - fsr_pdf = File.open(pdf_path) + pdf_path = generate_pdf_path(form_contents, form_submission, station_id) + file_name = build_file_name(user) - file_name = "#{DateTime.now.strftime('%Y%m%dT%H%M%S')}_#{user[:ssn].last(4)}_#{user[:last_name].tr(' ', '_')}" + upload_to_sharepoint(pdf_path, file_name) + ensure + File.delete(pdf_path) if pdf_path + end - file_transfer_path = - "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')" \ - "/Files/add(url='#{file_name}.pdf',overwrite=true)" + # TODO: this needs to change + def generate_pdf_path(form_contents, form_submission, station_id) + PdfFill::Filler.fill_ancillary_form(form_contents, "#{form_submission.id}-#{station_id}", '5655') + end + + def build_file_name(user) + "#{DateTime.now.strftime('%Y%m%dT%H%M%S')}_#{user[:ssn].last(4)}_#{user[:last_name].tr(' ', '_')}" + end + def upload_to_sharepoint(pdf_path, file_name) with_monitoring do - response = sharepoint_file_connection.post(file_transfer_path) do |req| + sharepoint_file_connection.post(file_transfer_url(file_name)) do |req| req.headers['Content-Type'] = 'octet/stream' - req.headers['Content-Length'] = fsr_pdf.size.to_s - req.body = Faraday::UploadIO.new(fsr_pdf, 'octet/stream') + req.body = Faraday::UploadIO.new(File.open(pdf_path), 'octet/stream') end - - File.delete(pdf_path) - - response end end + def file_transfer_url(file_name) + "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/Files/add(url='#{file_name}.pdf',overwrite=true)" + end + ## # Get the ID of the uploaded document's list item # # @param pdf_upload_response [Faraday::Response] - Network response from initial upload # - # @return [Number] + # @return [Integer] # - def get_pdf_list_item_id(pdf_upload_response) - uri = pdf_upload_response.body['d']['ListItemAllFields']['__deferred']['uri'] - path = uri.slice(uri.index(base_path)..-1) + def fetch_list_item_id(pdf_upload_response) + list_item_uri = extract_list_item_uri(pdf_upload_response) + retrieve_list_item_id(list_item_uri) + end - with_monitoring do - get_item_response = sharepoint_connection.get(path) + def extract_list_item_uri(response) + response.body['d']['ListItemAllFields']['__deferred']['uri'] + end - list_item_id = get_item_response.body.dig('d', 'ID') + def retrieve_list_item_id(uri) + path = uri.slice(uri.index(base_path)..-1) + with_monitoring do + response = sharepoint_connection.get(path) + list_item_id = response.body.dig('d', 'ID') raise ListItemNotFound if list_item_id.nil? list_item_id @@ -136,80 +150,84 @@ def get_pdf_list_item_id(pdf_upload_response) end ## - # Populate data columns with properties needed by VHA + # Populate SharePoint list item fields with VHA data # - # @param list_item_id[Number] - ID of SharePoint list item + # @param list_item_id [Integer] - ID of SharePoint list item # @param form_submission [Form5655Submission] - Persisted form # @param station_id [String] - VHA Station identifier # # @return [Faraday::Response] # - def update_list_item_fields(list_item_id:, form_submission:, station_id:) + def update_sharepoint_item(list_item_id:, form_submission:, station_id:) path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" with_monitoring do sharepoint_connection.post(path) do |req| req.headers['Content-Type'] = 'application/json;odata=verbose' req.headers['X-HTTP-METHOD'] = 'MERGE' req.headers['If-Match'] = '*' - req.body = { - '__metadata' => { - 'type' => 'SP.Data.SubmissionsItem' - }, - 'StationId' => station_id, - 'UID' => form_submission.id, - 'SSN' => user[:ssn], - 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" - }.to_json + req.body = build_item_payload(form_submission, station_id).to_json end end end + def build_item_payload(form_submission, station_id) + { + '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, + 'StationId' => station_id, + 'UID' => form_submission.id, + 'SSN' => user[:ssn], + 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" + } + end + + def handle_upload_error(error) + StatsD.increment("#{STATSD_KEY_PREFIX}.failure") + Rails.logger.error('SharePoint upload failed', error.message) + raise error + end + def auth_connection - Faraday.new(url: authentication_url, headers: auth_headers) do |conn| - conn.request :url_encoded - conn.use :breakers - conn.use Faraday::Response::RaiseError - conn.response :raise_custom_error, error_prefix: service_name - conn.response :json - conn.response :betamocks if mock_enabled? - conn.adapter Faraday.default_adapter + @auth_connection ||= Faraday.new(url: authentication_url, headers: auth_headers) do |conn| + configure_connection(conn) end end def sharepoint_connection - Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| - conn.request :json - conn.use :breakers - conn.use Faraday::Response::RaiseError - conn.response :raise_custom_error, error_prefix: service_name - conn.response :json - conn.response :betamocks if mock_enabled? - conn.adapter Faraday.default_adapter + @sharepoint_connection ||= Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| + configure_connection(conn) end end def sharepoint_file_connection - Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| - conn.request :multipart - conn.request :url_encoded - conn.use :breakers - conn.use Faraday::Response::RaiseError - conn.response :raise_custom_error, error_prefix: service_name - conn.response :json - conn.response :betamocks if mock_enabled? - conn.adapter Faraday.default_adapter + @sharepoint_file_connection ||= Faraday.new(url: "https://#{sharepoint_url}", + headers: sharepoint_headers) do |conn| + configure_file_connection(conn) end end + def configure_connection(conn) + conn.request :json + conn.use :breakers + conn.use Faraday::Response::RaiseError + conn.response :raise_custom_error, error_prefix: service_name + conn.response :json + conn.response :betamocks if mock_enabled? + conn.adapter Faraday.default_adapter + end + + def configure_file_connection(conn) + conn.request :multipart + conn.request :url_encoded + configure_connection(conn) + end + ## # HTTP headers for Microsoft Access Control authentication # # @return [Hash] # def auth_headers - { - 'Content-Type' => 'application/x-www-form-urlencoded' - } + { 'Content-Type' => 'application/x-www-form-urlencoded' } end ## @@ -224,12 +242,13 @@ def sharepoint_headers } end + # TODO: we need to set this to VBA def initialize_settings - @settings = Settings.vha.sharepoint + Settings.vha.sharepoint end def mpi_service - @service ||= MPI::Service.new + @mpi_service ||= MPI::Service.new end ## From c2d5c248ca090e7f50bc1b76b80264556086def7 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 16:46:12 -0400 Subject: [PATCH 22/38] one more comment --- .../app/services/simple_forms_api/share_point_service/service.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb index ee1835db1ce..cb66bb9ae67 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb @@ -118,6 +118,7 @@ def upload_to_sharepoint(pdf_path, file_name) end end + # TODO: update this def file_transfer_url(file_name) "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/Files/add(url='#{file_name}.pdf',overwrite=true)" end From b8fe57fb5742a9f49460118655a2b84aef5e1d6b Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 21:54:24 -0400 Subject: [PATCH 23/38] lots of renaming and note taking --- .../simple_forms_api/share_point/client.rb | 190 +++++++++++++ .../share_point_service/service.rb | 265 ------------------ 2 files changed, 190 insertions(+), 265 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb delete mode 100644 modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb new file mode 100644 index 00000000000..01ffd2ab2b5 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb @@ -0,0 +1,190 @@ +# frozen_string_literal: true + +require 'faraday/multipart' + +module SimpleFormsApi + module SharePoint + class Client + extend Forwardable + include Common::Client::Concerns::Monitoring + + class ListItemNotFound < StandardError; end + + # TODO: this is a placeholder; add configuration for OFO/VBA sharepoint access + STATSD_KEY_PREFIX = 'api.ofo.submission_error_remediation.sharepoint.request' + + attr_reader :settings + attr_accessor :access_token + + # TODO: these are placeholders; add configuration for OFO/VBA sharepoint access + def_delegators :settings, :authentication_url, :base_path, :client_id, :client_secret, :resource, :service_name, + :sharepoint_url, :tenant_id + + def initialize + @settings = initialize_settings + @access_token = set_sharepoint_access_token + end + + def upload(form_contents:, form_submission:, station_id:) + upload_response = upload_payload(form_contents:, form_submission:, station_id:) + list_item_id = fetch_list_item_id(upload_response) + + update_sharepoint_item(list_item_id:, form_submission:, station_id:) + rescue => e + handle_upload_error(e) + end + + private + + def set_sharepoint_access_token + auth_response = auth_connection.post("/#{tenant_id}/tokens/OAuth/2", auth_params) + auth_response.body['access_token'] + end + + def auth_params + { + client_id: "#{client_id}@#{tenant_id}", + client_secret:, + grant_type: 'client_credentials', + resource: "#{resource}/#{sharepoint_url}@#{tenant_id}" + } + end + + def upload_payload(form_contents:, form_submission:, station_id:) + payload_path = generate_payload_path(form_contents, form_submission, station_id) + payload_name = build_payload_name + + upload_to_sharepoint(payload_path, payload_name) + ensure + # TODO: will this file be available locally? + File.delete(payload_path) if payload_path + end + + # TODO: update this once OFO/VBA gives guidance + def build_payload_name; end + + # TODO: update this once OFO/VBA gives guidance + def generate_payload_path(form_contents, form_submission, station_id); end + + # TODO: a CSV manifest will need to be included with payload + def build_manifest_to_include_with_payload; end + + # TODO: change this to interface with S3 or an intermediary job + def upload_to_sharepoint(payload_path, payload_name) + with_monitoring do + sharepoint_file_connection.post(file_transfer_url(payload_name)) do |req| + req.headers['Content-Type'] = 'octet/stream' + req.body = Faraday::UploadIO.new(File.open(payload_path), 'octet/stream') + end + end + end + + # TODO: this currently handles a PDF file; determine correct payload url + def file_transfer_url(payload_name) + "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/" \ + "Files/add(url='#{payload_name}.pdf',overwrite=true)" + end + + # Get the ID of the uploaded document's list item + def fetch_list_item_id(pdf_upload_response) + list_item_uri = extract_list_item_uri(pdf_upload_response) + retrieve_list_item_id(list_item_uri) + end + + def extract_list_item_uri(response) + response.body['d']['ListItemAllFields']['__deferred']['uri'] + end + + def retrieve_list_item_id(uri) + path = uri.slice(uri.index(base_path)..-1) + with_monitoring do + response = sharepoint_connection.get(path) + list_item_id = response.body.dig('d', 'ID') + raise ListItemNotFound if list_item_id.nil? + + list_item_id + end + end + + # TODO: this is a holdover from VHA logic and will need changed + def update_sharepoint_item(list_item_id:, form_submission:, station_id:) + path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" + with_monitoring do + sharepoint_connection.post(path) do |req| + req.headers['Content-Type'] = 'application/json;odata=verbose' + req.headers['X-HTTP-METHOD'] = 'MERGE' + req.headers['If-Match'] = '*' + req.body = build_item_payload(form_submission, station_id).to_json + end + end + end + + # TODO: this is a holdover from VHA logic and will need changed + def build_item_payload(form_submission, station_id) + { + '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, + 'StationId' => station_id, + 'UID' => form_submission.id + # 'SSN' => user[:ssn], + # 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" + } + end + + def handle_upload_error(error) + StatsD.increment("#{STATSD_KEY_PREFIX}.failure") + Rails.logger.error('SharePoint upload failed', error.message) + raise error + end + + def auth_connection + @auth_connection ||= new_connection(headers: auth_headers) + end + + def sharepoint_connection + @sharepoint_connection ||= new_connection + end + + def sharepoint_file_connection + @sharepoint_file_connection ||= new_connection(configuration: :file_connection) + end + + def new_connection(url: "https://#{sharepoint_url}", headers: sharepoint_headers, configuration: :connection) + Faraday.new(url:, headers:) { |conn| method(configuration).call(conn) } + end + + def connection(connection) + connection.request :json + connection.use :breakers + connection.use Faraday::Response::RaiseError + connection.response :raise_custom_error, error_prefix: service_name + connection.response :json + connection.response :betamocks if mock_enabled? + connection.adapter Faraday.default_adapter + end + + def file_connection(connection) + connection.request :multipart + connection.request :url_encoded + configure_connection(connection) + end + + # HTTP headers for Microsoft Access Control authentication + def auth_headers + { 'Content-Type' => 'application/x-www-form-urlencoded' } + end + + # HTTP headers for uploading documents to SharePoint + def sharepoint_headers + { + 'Authorization' => "Bearer #{access_token}", + 'Accept' => 'application/json;odata=verbose' + } + end + + # TODO: this is a placeholder; add configuration for OFO/VBA sharepoint access + def initialize_settings + Settings.ofo.sharepoint + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb deleted file mode 100644 index cb66bb9ae67..00000000000 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point_service/service.rb +++ /dev/null @@ -1,265 +0,0 @@ -# frozen_string_literal: true - -require 'faraday/multipart' - -module SimpleFormsApi - module SharePointService - class Service - extend Forwardable - include Common::Client::Concerns::Monitoring - - class ListItemNotFound < StandardError; end - - STATSD_KEY_PREFIX = 'api.vha.financial_status_report.sharepoint.request' - - attr_reader :settings - attr_accessor :access_token, :user - - def_delegators :settings, :sharepoint_url, :client_id, :client_secret, :tenant_id, :resource, :service_name, - :base_path, :authentication_url - - def initialize - @settings = initialize_settings - @access_token = set_sharepoint_access_token - end - - ## - # Upload a PDF file to VHA SharePoint - # - # @param form_contents [Hash] - The JSON of the form - # @param form_submission [Form5655Submission] - Persisted submission of the form - # @param station_id [String] - The VHA station the form belongs to - # - # @return [Faraday::Response] - Response from SharePoint upload - # - def upload(form_contents:, form_submission:, station_id:) - set_user_data(form_submission.user_account_id) - upload_response = upload_pdf(form_contents:, form_submission:, station_id:) - list_item_id = fetch_list_item_id(upload_response) - - update_sharepoint_item(list_item_id:, form_submission:, station_id:) - rescue => e - handle_upload_error(e) - end - - private - - ## - # Set the access token for SharePoint authentication from Microsoft Access Control - # - # @return [String] - The access token - # - def set_sharepoint_access_token - auth_response = auth_connection.post("/#{tenant_id}/tokens/OAuth/2", auth_params) - auth_response.body['access_token'] - end - - def auth_params - { - grant_type: 'client_credentials', - client_id: "#{client_id}@#{tenant_id}", - client_secret:, - resource: "#{resource}/#{sharepoint_url}@#{tenant_id}" - } - end - - def set_user_data(user_account_id) - user_account = UserAccount.find(user_account_id) - user_profile = fetch_user_profile(user_account.icn) - @user = extract_user_info(user_profile) - end - - # TODO: what is MPI service and can we use it? - def fetch_user_profile(icn) - mpi_service.find_profile_by_identifier(identifier: icn, identifier_type: MPI::Constants::ICN) - end - - def extract_user_info(profile) - { - ssn: profile.ssn, - first_name: profile.given_names.first, - last_name: profile.family_name - } - end - - ## - # Upload PDF document to SharePoint site - # - # @param form_contents [Hash] - Contents to fill form with - # @param form_submission [Form5655Submission] - Persisted form - # @param station_id [String] - VHA Station identifier - # - # @return [Faraday::Response] - # - def upload_pdf(form_contents:, form_submission:, station_id:) - pdf_path = generate_pdf_path(form_contents, form_submission, station_id) - file_name = build_file_name(user) - - upload_to_sharepoint(pdf_path, file_name) - ensure - File.delete(pdf_path) if pdf_path - end - - # TODO: this needs to change - def generate_pdf_path(form_contents, form_submission, station_id) - PdfFill::Filler.fill_ancillary_form(form_contents, "#{form_submission.id}-#{station_id}", '5655') - end - - def build_file_name(user) - "#{DateTime.now.strftime('%Y%m%dT%H%M%S')}_#{user[:ssn].last(4)}_#{user[:last_name].tr(' ', '_')}" - end - - def upload_to_sharepoint(pdf_path, file_name) - with_monitoring do - sharepoint_file_connection.post(file_transfer_url(file_name)) do |req| - req.headers['Content-Type'] = 'octet/stream' - req.body = Faraday::UploadIO.new(File.open(pdf_path), 'octet/stream') - end - end - end - - # TODO: update this - def file_transfer_url(file_name) - "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/Files/add(url='#{file_name}.pdf',overwrite=true)" - end - - ## - # Get the ID of the uploaded document's list item - # - # @param pdf_upload_response [Faraday::Response] - Network response from initial upload - # - # @return [Integer] - # - def fetch_list_item_id(pdf_upload_response) - list_item_uri = extract_list_item_uri(pdf_upload_response) - retrieve_list_item_id(list_item_uri) - end - - def extract_list_item_uri(response) - response.body['d']['ListItemAllFields']['__deferred']['uri'] - end - - def retrieve_list_item_id(uri) - path = uri.slice(uri.index(base_path)..-1) - with_monitoring do - response = sharepoint_connection.get(path) - list_item_id = response.body.dig('d', 'ID') - raise ListItemNotFound if list_item_id.nil? - - list_item_id - end - end - - ## - # Populate SharePoint list item fields with VHA data - # - # @param list_item_id [Integer] - ID of SharePoint list item - # @param form_submission [Form5655Submission] - Persisted form - # @param station_id [String] - VHA Station identifier - # - # @return [Faraday::Response] - # - def update_sharepoint_item(list_item_id:, form_submission:, station_id:) - path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" - with_monitoring do - sharepoint_connection.post(path) do |req| - req.headers['Content-Type'] = 'application/json;odata=verbose' - req.headers['X-HTTP-METHOD'] = 'MERGE' - req.headers['If-Match'] = '*' - req.body = build_item_payload(form_submission, station_id).to_json - end - end - end - - def build_item_payload(form_submission, station_id) - { - '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, - 'StationId' => station_id, - 'UID' => form_submission.id, - 'SSN' => user[:ssn], - 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" - } - end - - def handle_upload_error(error) - StatsD.increment("#{STATSD_KEY_PREFIX}.failure") - Rails.logger.error('SharePoint upload failed', error.message) - raise error - end - - def auth_connection - @auth_connection ||= Faraday.new(url: authentication_url, headers: auth_headers) do |conn| - configure_connection(conn) - end - end - - def sharepoint_connection - @sharepoint_connection ||= Faraday.new(url: "https://#{sharepoint_url}", headers: sharepoint_headers) do |conn| - configure_connection(conn) - end - end - - def sharepoint_file_connection - @sharepoint_file_connection ||= Faraday.new(url: "https://#{sharepoint_url}", - headers: sharepoint_headers) do |conn| - configure_file_connection(conn) - end - end - - def configure_connection(conn) - conn.request :json - conn.use :breakers - conn.use Faraday::Response::RaiseError - conn.response :raise_custom_error, error_prefix: service_name - conn.response :json - conn.response :betamocks if mock_enabled? - conn.adapter Faraday.default_adapter - end - - def configure_file_connection(conn) - conn.request :multipart - conn.request :url_encoded - configure_connection(conn) - end - - ## - # HTTP headers for Microsoft Access Control authentication - # - # @return [Hash] - # - def auth_headers - { 'Content-Type' => 'application/x-www-form-urlencoded' } - end - - ## - # HTTP headers for uploading documents to SharePoint - # - # @return [Hash] - # - def sharepoint_headers - { - 'Authorization' => "Bearer #{access_token}", - 'Accept' => 'application/json;odata=verbose' - } - end - - # TODO: we need to set this to VBA - def initialize_settings - Settings.vha.sharepoint - end - - def mpi_service - @mpi_service ||= MPI::Service.new - end - - ## - # Betamocks enabled status from settings - # - # @return [Boolean] - # - def mock_enabled? - settings.mock || false - end - end - end -end From 8206250e0f021d532515e467f42e0eb939183337 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 21:59:22 -0400 Subject: [PATCH 24/38] more s3 changes --- .../simple_forms_api/v1/uploads_controller.rb | 2 +- .../jobs/submission_archive_handler_job.rb | 4 ++-- .../s3_service/submission_archive_handler.rb | 2 +- .../s3_service/submission_archiver.rb | 21 +------------------ .../simple_forms_api/s3_service/utils.rb | 2 +- .../submission_archiver_spec.rb} | 2 +- 6 files changed, 7 insertions(+), 26 deletions(-) rename modules/simple_forms_api/spec/services/{s3_service/archive_submission_to_pdf_spec.rb => s3/submission_archiver_spec.rb} (98%) diff --git a/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb b/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb index d14ec82bcbb..999f9b86407 100644 --- a/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb +++ b/modules/simple_forms_api/app/controllers/simple_forms_api/v1/uploads_controller.rb @@ -167,7 +167,7 @@ def upload_pdf(file_path, metadata, form) location, uuid, submission_attempt = prepare_for_upload(form, file_path) log_upload_details(location, uuid) response = perform_pdf_upload(location, file_path, metadata, form) - SimpleFormsApi::S3Service::SubmissionArchiveHandlerJob.perform_async( + SimpleFormsApi::S3::SubmissionArchiveHandlerJob.perform_async( submission_ids: [submission_attempt.form_submission.id], metadata:, file_path: diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb index 2a053a86626..bb23a4d7f96 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb @@ -1,9 +1,9 @@ # frozen_string_literal: true module SimpleFormsApi - module S3Service + module S3 module Jobs - class SubmissionArchiveHandlerJob < SimpleFormsApi::S3Service::Utils + class SubmissionArchiveHandlerJob < SimpleFormsApi::S3::Utils include Sidekiq::Worker sidekiq_options retry: 3, queue: 'default' diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb index 6e8aecc6d40..d3f8c67958b 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module SimpleFormsApi - module S3Service + module S3 class SubmissionArchiveHandler < Utils attr_reader :attachments, :benefits_intake_uuids, :parent_dir, :successes, :failures, :bundle_by_user diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb index 69452294d25..31232ce366d 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb @@ -1,26 +1,7 @@ # frozen_string_literal: true -# To use -# ids = -# parent_dir = -# -# to see your archive in s3 -# 1. go here https://console.amazonaws-us-gov.com/s3/home?region=us-gov-west-1# -# 2. login with 2fa -# 3. search for dsva-vetsgov-prod-reports -# 4. search for your parent_dir name, e.g. 526archive_aug_21st_2024 -# -# If you do not provide a parent_dir, the script defaults to a folder called vff-simple-forms -# -# OPTION 1: Run the script with user groupings -# - requires SubmissionDuplicateReport object -# - SubmissionArchiveHandler.new(submission_ids: ids, parent_dir:).run -# -# OPTION 2: Run without user groupings -# ids.each { |id| SubmissionArchiver.new(submission_id: id, parent_dir:).run } -# this will just put each submission in a folder by it's id under the parent dir module SimpleFormsApi - module S3Service + module S3 class SubmissionArchiver < Utils attr_reader :benefits_intake_uuid, :include_json_archive, :include_text_archive, :metadata, :parent_dir, :submission diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb index b7f576bad02..3676a1e5dd9 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true module SimpleFormsApi - module S3Service + module S3 class Utils private diff --git a/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb b/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb similarity index 98% rename from modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb rename to modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb index 27bf435b79f..e4e4d3e07e9 100644 --- a/modules/simple_forms_api/spec/services/s3_service/archive_submission_to_pdf_spec.rb +++ b/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb @@ -3,7 +3,7 @@ require 'rails_helper' require SimpleFormsApi::Engine.root.join('spec', 'spec_helper.rb') -RSpec.describe SimpleFormsApi::S3Service::SubmissionArchiver, type: :model do +RSpec.describe SimpleFormsApi::S3::SubmissionArchiver, type: :model do let(:submission_id) { 1 } let(:form_id) { '21-10210' } let(:form_data) { File.read('modules/simple_forms_api/spec/fixtures/form_json/vba_21_10210.json') } From f006b3cf2de30baad892ece16219a56905873654 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 22:14:00 -0400 Subject: [PATCH 25/38] more renaming --- .../jobs/submission_archive_handler_job.rb | 6 ++---- .../{s3_service => s3}/submission_archive_handler.rb | 6 ++---- .../{s3_service => s3}/submission_archiver.rb | 11 +++++------ .../simple_forms_api/{s3_service => s3}/utils.rb | 0 4 files changed, 9 insertions(+), 14 deletions(-) rename modules/simple_forms_api/app/services/simple_forms_api/{s3_service => s3}/jobs/submission_archive_handler_job.rb (79%) rename modules/simple_forms_api/app/services/simple_forms_api/{s3_service => s3}/submission_archive_handler.rb (93%) rename modules/simple_forms_api/app/services/simple_forms_api/{s3_service => s3}/submission_archiver.rb (93%) rename modules/simple_forms_api/app/services/simple_forms_api/{s3_service => s3}/utils.rb (100%) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/submission_archive_handler_job.rb similarity index 79% rename from modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/submission_archive_handler_job.rb index bb23a4d7f96..848c6e70da6 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/jobs/submission_archive_handler_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/submission_archive_handler_job.rb @@ -22,12 +22,10 @@ def perform(benefits_intake_uuids:, **options) def default_options { - attachments: [], - bundle_by_user: true, + attachments: [], # an array of attachment confirmation codes file_path: nil, # file path for the PDF file to be archived metadata: {}, # pertinent metadata for original file upload/submission - parent_dir: 'vff-simple-forms', # S3 bucket base directory where files live - signed_link: false # TODO: Will we ever need to make this optional? + parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live } end end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb similarity index 93% rename from modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb index d3f8c67958b..04b4a89e193 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb @@ -3,13 +3,12 @@ module SimpleFormsApi module S3 class SubmissionArchiveHandler < Utils - attr_reader :attachments, :benefits_intake_uuids, :parent_dir, :successes, :failures, :bundle_by_user + attr_reader :attachments, :benefits_intake_uuids, :parent_dir, :metadata, :file_path def initialize(benefits_intake_uuids: [], **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) @benefits_intake_uuids = benefits_intake_uuids - @failures = [] assign_instance_variables(defaults) end @@ -24,8 +23,7 @@ def run def default_options { - attachments: [], - bundle_by_user: true, + attachments: [], # an array of attachment confirmation codes file_path: nil, # file path for the PDF file to be archived metadata: {}, # pertinent metadata for original file upload/submission parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb similarity index 93% rename from modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index 31232ce366d..83c32fa4c66 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -4,13 +4,13 @@ module SimpleFormsApi module S3 class SubmissionArchiver < Utils attr_reader :benefits_intake_uuid, :include_json_archive, :include_text_archive, :metadata, :parent_dir, - :submission + :submission, :file_path class << self def fetch_presigned_url(benefits_intake_uuid) instance = self.class.new(benefits_intake_uuid:) - instance.fetch_pdf(benefits_intake_uuid, form_number) - # TODO: return presigned_url from object + pdf = instance.fetch_pdf(benefits_intake_uuid) + sign_s3_file_url(pdf) end end @@ -57,6 +57,7 @@ def write_pdf "#{output_directory_path}/form_#{submission.form_data['form_number']}.pdf", Base64.decode64(encoded_pdf) ) + # TODO: do we want to immediately sign the pdf? sign_s3_file_url(pdf) end @@ -72,9 +73,7 @@ def fetch_pdf end def sign_s3_file_url(pdf) - signed_url = pdf.presigned_url(:get, expires_in: 30.minutes.to_i) - # TODO: How do we want to handle this? - # submission.form_submission_attempts&.last&.update(signed_url:) + pdf.presigned_url(:get, expires_in: 30.minutes.to_i) end def error_details(error) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb similarity index 100% rename from modules/simple_forms_api/app/services/simple_forms_api/s3_service/utils.rb rename to modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb From eca30a8702df49ac4007759637b9b6968d7a8bd5 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Tue, 10 Sep 2024 22:29:58 -0400 Subject: [PATCH 26/38] manifest notes --- .../services/simple_forms_api/s3/submission_archiver.rb | 9 +++++++-- .../app/services/simple_forms_api/share_point/client.rb | 7 ++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index 83c32fa4c66..97de5bcc1f7 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -3,8 +3,8 @@ module SimpleFormsApi module S3 class SubmissionArchiver < Utils - attr_reader :benefits_intake_uuid, :include_json_archive, :include_text_archive, :metadata, :parent_dir, - :submission, :file_path + attr_reader :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, :include_text_archive, + :metadata, :parent_dir, :submission class << self def fetch_presigned_url(benefits_intake_uuid) @@ -37,6 +37,7 @@ def default_options attachments: [], # an array of attachment confirmation codes file_path: nil, # file path for the PDF file to be archived include_json_archive: true, # include the form data as a JSON object + include_manifest: true, # include a CSV file containing Veteran ID & original submission datetime include_text_archive: true, # include the form data as a text file metadata: {}, # pertinent metadata for original file upload/submission parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live @@ -48,6 +49,7 @@ def process_submission_files write_as_json_archive if include_json_archive write_as_text_archive if include_text_archive write_attachments if attachments.present? + write_manifest if include_manifest write_metadata end @@ -102,6 +104,9 @@ def write_attachments handle_upload_error(e) end + # TODO: add this + def write_manifest; end + def process_attachment(attachment) log_info("Processing attachment: #{attachment}") local_file = PersistentAttachment.find_by(guid: attachment) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb index 01ffd2ab2b5..56403c88575 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb @@ -66,10 +66,7 @@ def build_payload_name; end # TODO: update this once OFO/VBA gives guidance def generate_payload_path(form_contents, form_submission, station_id); end - # TODO: a CSV manifest will need to be included with payload - def build_manifest_to_include_with_payload; end - - # TODO: change this to interface with S3 or an intermediary job + # TODO: change this to interface with S3 or an intermediary job/service def upload_to_sharepoint(payload_path, payload_name) with_monitoring do sharepoint_file_connection.post(file_transfer_url(payload_name)) do |req| @@ -79,7 +76,7 @@ def upload_to_sharepoint(payload_path, payload_name) end end - # TODO: this currently handles a PDF file; determine correct payload url + # TODO: this is currently configured for a PDF file; determine correct payload url def file_transfer_url(payload_name) "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/" \ "Files/add(url='#{payload_name}.pdf',overwrite=true)" From fff0df32e22f91c97f993d749a01691c737f17f8 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 10:01:35 -0400 Subject: [PATCH 27/38] archiver optimizations, make one s3 call instead of many --- .../s3/submission_archiver.rb | 94 +++++++++++++------ .../simple_forms_api/share_point/client.rb | 10 +- 2 files changed, 70 insertions(+), 34 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index 97de5bcc1f7..b4bc9854c7d 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -1,5 +1,8 @@ # frozen_string_literal: true +require 'csv' +require 'fileutils' + module SimpleFormsApi module S3 class SubmissionArchiver < Utils @@ -18,13 +21,24 @@ def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop: defaults = default_options.merge(options) @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) + raise 'Submission was not found' unless submission + + @benefits_intake_uuid = submission.benefits_intake_uuid assign_instance_variables(defaults) end def run - log_info("Processing submission ID: #{submission.id}") + log_info("Processing submission: #{benefits_intake_uuid}") + + FileUtils.mkdir_p(temp_directory_path) + process_submission_files + + upload_temp_folder_to_s3 + + FileUtils.rm_f(temp_directory_path) + output_directory_path rescue => e handle_error("Failed submission: #{submission.id}", e, { submission_id: submission.id, benefits_intake_uuid: }) @@ -54,13 +68,21 @@ def process_submission_files end def write_pdf - encoded_pdf = generate_pdf_content - pdf = save_file_to_s3( - "#{output_directory_path}/form_#{submission.form_data['form_number']}.pdf", - Base64.decode64(encoded_pdf) - ) - # TODO: do we want to immediately sign the pdf? - sign_s3_file_url(pdf) + write_tempfile(submission_pdf_filename, Base64.decode64(generate_pdf_content)) + end + + def upload_temp_folder_to_s3 + Find.find(temp_directory_path) do |path| + next if File.directory?(path) + + relative_path = path.sub(temp_directory_path, '') + s3_path = "#{output_directory_path}/#{relative_path}" + + File.open(path, 'rb') do |file| + pdf = save_file_to_s3(s3_path, file.read) + sign_s3_file_url(pdf) if relative_path == submission_pdf_filename + end + end end def generate_pdf_content @@ -70,10 +92,14 @@ def generate_pdf_content end def fetch_pdf - path = "#{output_directory_path}/form_#{submission.form_data['form_number']}.pdf" + path = "#{output_directory_path}/#{submission_pdf_filename}" s3_resource.bucket(target_bucket).object(path) end + def submission_pdf_filename + @submission_pdf_filename ||= "form_#{submission.form_data['form_number']}.pdf" + end + def sign_s3_file_url(pdf) pdf.presigned_url(:get, expires_in: 30.minutes.to_i) end @@ -84,55 +110,61 @@ def error_details(error) def write_as_json_archive form_json = JSON.parse(submission.form_data) - save_file_to_s3("#{output_directory_path}/form_text_archive.json", JSON.pretty_generate(form_json)) + write_tempfile('form_text_archive.json', JSON.pretty_generate(form_json)) end def write_as_text_archive form_text_archive = submission.form_data['claimDate'] ||= submission.created_at.iso8601 - save_file_to_s3("#{output_directory_path}/form_text_archive.txt", form_text_archive.to_json) + write_tempfile('form_text_archive.txt', form_text_archive.to_json) end def write_metadata - save_file_to_s3("#{output_directory_path}/metadata.json", metadata.to_json) + write_tempfile('metadata.json', metadata.to_json) end def write_attachments - log_info("Moving #{attachments.count} attachments") - attachments.each { |upload| process_attachment(upload) } + log_info("Processing #{attachments.count} attachments") + attachments.each_with_index { |upload, i| process_attachment(i + 1, upload) } write_attachment_failure_report if attachment_failures.present? rescue => e handle_upload_error(e) end - # TODO: add this - def write_manifest; end + def write_manifest + veteran_id = metadata['fileNumber'] + submission_datetime = submission.created_at + file_name = "submission_#{benefits_intake_uuid}_#{submission_datetime}_manifest.csv" + + "#{temp_directory_path}#{file_name}".tap do |file_path| + CSV.open(file_path, 'wb') do |csv| + csv << ['Veteran ID', 'Submission DateTime'] + csv << [veteran_id, submission_datetime] + end + end + end + + def write_tempfile(file_name, payload) + File.write("#{temp_directory_path}#{file_name}", payload) + end - def process_attachment(attachment) - log_info("Processing attachment: #{attachment}") + def process_attachment(attach_num, attachment) + log_info("Processing attachment ##{attach_num}: #{attachment}") local_file = PersistentAttachment.find_by(guid: attachment) raise 'Local record not found' unless local_file - copy_file_between_buckets(local_file) + write_tempfile("attachment_#{attach_num}.pdf", local_file.to_pdf) rescue => e attachment_failures << e handle_error('Attachment failure.', e) raise e end - def copy_file_between_buckets(local_file) - source_obj = s3_resource.bucket(local_file.get_file.uploader.aws_bucket).object(local_file.get_file.path) - target_obj = s3_resource.bucket(target_bucket).object("#{attachment_path}/#{local_file.get_file.filename}") - target_obj.copy_from(source_obj) - end - def write_attachment_failure_report - save_file_to_s3("#{output_directory_path}/attachment_failures.txt", JSON.pretty_generate(attachment_failures)) + write_tempfile('attachment_failures.txt', JSON.pretty_generate(attachment_failures)) end def save_file_to_s3(path, content) - s3_resource.bucket(target_bucket).object(path).tap do |obj| - obj.put(body: content) - end + s3_resource.bucket(target_bucket).object(path).tap { |obj| obj.put(body: content) } end def output_directory_path @@ -143,6 +175,10 @@ def attachment_failures @attachment_failures ||= [] end + def temp_directory_path + @temp_directory_path ||= Rails.root.join("tmp/#{benefits_intake_uuid}-#{SecureRandom.hex}/").to_s + end + def attachment_path @attachment_path ||= "#{output_directory_path}/attachments" end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb index 56403c88575..84dc3838d5f 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb @@ -51,7 +51,7 @@ def auth_params end def upload_payload(form_contents:, form_submission:, station_id:) - payload_path = generate_payload_path(form_contents, form_submission, station_id) + payload_path = generate_payload_path payload_name = build_payload_name upload_to_sharepoint(payload_path, payload_name) @@ -64,7 +64,7 @@ def upload_payload(form_contents:, form_submission:, station_id:) def build_payload_name; end # TODO: update this once OFO/VBA gives guidance - def generate_payload_path(form_contents, form_submission, station_id); end + def generate_payload_path; end # TODO: change this to interface with S3 or an intermediary job/service def upload_to_sharepoint(payload_path, payload_name) @@ -76,10 +76,10 @@ def upload_to_sharepoint(payload_path, payload_name) end end - # TODO: this is currently configured for a PDF file; determine correct payload url + # TODO: confirm this is the correct payload url def file_transfer_url(payload_name) "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/" \ - "Files/add(url='#{payload_name}.pdf',overwrite=true)" + "Files/add(url='#{payload_name}.zip',overwrite=true)" end # Get the ID of the uploaded document's list item @@ -103,8 +103,8 @@ def retrieve_list_item_id(uri) end end - # TODO: this is a holdover from VHA logic and will need changed def update_sharepoint_item(list_item_id:, form_submission:, station_id:) + # TODO: this is a placeholder path and will need to be changed path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" with_monitoring do sharepoint_connection.post(path) do |req| From 1e137b0ece8f954cfaf19fe861e9b9826fd4523b Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 10:22:01 -0400 Subject: [PATCH 28/38] more cleanup --- .../simple_forms_api/s3/submission_archiver.rb | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index b4bc9854c7d..f183b2b4c39 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -147,12 +147,12 @@ def write_tempfile(file_name, payload) File.write("#{temp_directory_path}#{file_name}", payload) end - def process_attachment(attach_num, attachment) - log_info("Processing attachment ##{attach_num}: #{attachment}") - local_file = PersistentAttachment.find_by(guid: attachment) - raise 'Local record not found' unless local_file + def process_attachment(attachment_number, guid) + log_info("Processing attachment ##{attachment_number}: #{guid}") + attachment = PersistentAttachment.find_by(guid:).to_pdf + raise 'Local record not found' unless attachment - write_tempfile("attachment_#{attach_num}.pdf", local_file.to_pdf) + write_tempfile("attachment_#{attachment_number}.pdf", attachment) rescue => e attachment_failures << e handle_error('Attachment failure.', e) @@ -178,10 +178,6 @@ def attachment_failures def temp_directory_path @temp_directory_path ||= Rails.root.join("tmp/#{benefits_intake_uuid}-#{SecureRandom.hex}/").to_s end - - def attachment_path - @attachment_path ||= "#{output_directory_path}/attachments" - end end end end From 195110f2f2a81db06d0f5a2c2028ec3264d8dee0 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 10:28:49 -0400 Subject: [PATCH 29/38] split up share point logic --- .../share_point/archive_uploader.rb | 81 +++++++++++++++++++ .../simple_forms_api/share_point/client.rb | 70 ---------------- 2 files changed, 81 insertions(+), 70 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb new file mode 100644 index 00000000000..a7ec6886f34 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +require 'faraday/multipart' + +module SimpleFormsApi + module SharePoint + class ArchiveUploader < Client + def upload(form_contents:, form_submission:, station_id:) + upload_response = upload_payload(form_contents:, form_submission:, station_id:) + list_item_id = fetch_list_item_id(upload_response) + + update_sharepoint_item(list_item_id:, form_submission:, station_id:) + rescue => e + handle_upload_error(e) + end + + private + + def upload_payload(form_contents:, form_submission:, station_id:) + payload_path = generate_payload_path + payload_name = build_payload_name + + upload_to_sharepoint(payload_path, payload_name) + ensure + # TODO: will this file be available locally? + File.delete(payload_path) if payload_path + end + + # TODO: update this once OFO/VBA gives guidance + def build_payload_name; end + + # TODO: update this once OFO/VBA gives guidance + def generate_payload_path; end + + # Get the ID of the uploaded document's list item + def fetch_list_item_id(pdf_upload_response) + list_item_uri = extract_list_item_uri(pdf_upload_response) + retrieve_list_item_id(list_item_uri) + end + + def extract_list_item_uri(response) + response.body['d']['ListItemAllFields']['__deferred']['uri'] + end + + def retrieve_list_item_id(uri) + path = uri.slice(uri.index(base_path)..-1) + with_monitoring do + response = sharepoint_connection.get(path) + list_item_id = response.body.dig('d', 'ID') + raise ListItemNotFound if list_item_id.nil? + + list_item_id + end + end + + def update_sharepoint_item(list_item_id:, form_submission:, station_id:) + # TODO: this is a placeholder path and will need to be changed + path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" + with_monitoring do + sharepoint_connection.post(path) do |req| + req.headers['Content-Type'] = 'application/json;odata=verbose' + req.headers['X-HTTP-METHOD'] = 'MERGE' + req.headers['If-Match'] = '*' + req.body = build_item_payload(form_submission, station_id).to_json + end + end + end + + # TODO: this is a holdover from VHA logic and will need changed + def build_item_payload(form_submission, station_id) + { + '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, + 'StationId' => station_id, + 'UID' => form_submission.id + # 'SSN' => user[:ssn], + # 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" + } + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb index 84dc3838d5f..e79d8801f21 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb @@ -25,15 +25,6 @@ def initialize @access_token = set_sharepoint_access_token end - def upload(form_contents:, form_submission:, station_id:) - upload_response = upload_payload(form_contents:, form_submission:, station_id:) - list_item_id = fetch_list_item_id(upload_response) - - update_sharepoint_item(list_item_id:, form_submission:, station_id:) - rescue => e - handle_upload_error(e) - end - private def set_sharepoint_access_token @@ -50,22 +41,6 @@ def auth_params } end - def upload_payload(form_contents:, form_submission:, station_id:) - payload_path = generate_payload_path - payload_name = build_payload_name - - upload_to_sharepoint(payload_path, payload_name) - ensure - # TODO: will this file be available locally? - File.delete(payload_path) if payload_path - end - - # TODO: update this once OFO/VBA gives guidance - def build_payload_name; end - - # TODO: update this once OFO/VBA gives guidance - def generate_payload_path; end - # TODO: change this to interface with S3 or an intermediary job/service def upload_to_sharepoint(payload_path, payload_name) with_monitoring do @@ -82,51 +57,6 @@ def file_transfer_url(payload_name) "Files/add(url='#{payload_name}.zip',overwrite=true)" end - # Get the ID of the uploaded document's list item - def fetch_list_item_id(pdf_upload_response) - list_item_uri = extract_list_item_uri(pdf_upload_response) - retrieve_list_item_id(list_item_uri) - end - - def extract_list_item_uri(response) - response.body['d']['ListItemAllFields']['__deferred']['uri'] - end - - def retrieve_list_item_id(uri) - path = uri.slice(uri.index(base_path)..-1) - with_monitoring do - response = sharepoint_connection.get(path) - list_item_id = response.body.dig('d', 'ID') - raise ListItemNotFound if list_item_id.nil? - - list_item_id - end - end - - def update_sharepoint_item(list_item_id:, form_submission:, station_id:) - # TODO: this is a placeholder path and will need to be changed - path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" - with_monitoring do - sharepoint_connection.post(path) do |req| - req.headers['Content-Type'] = 'application/json;odata=verbose' - req.headers['X-HTTP-METHOD'] = 'MERGE' - req.headers['If-Match'] = '*' - req.body = build_item_payload(form_submission, station_id).to_json - end - end - end - - # TODO: this is a holdover from VHA logic and will need changed - def build_item_payload(form_submission, station_id) - { - '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, - 'StationId' => station_id, - 'UID' => form_submission.id - # 'SSN' => user[:ssn], - # 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" - } - end - def handle_upload_error(error) StatsD.increment("#{STATSD_KEY_PREFIX}.failure") Rails.logger.error('SharePoint upload failed', error.message) From 6b8c71347d173fd06694299e6fb1863d1d48177f Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 11:09:50 -0400 Subject: [PATCH 30/38] add job to handle moving S3 stuff to SharePoint --- .../s3/jobs/archive_uploader_job.rb | 42 +++++++++++++++++++ .../s3/submission_archiver.rb | 28 +++++++++++-- .../share_point/archive_uploader.rb | 3 +- 3 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb new file mode 100644 index 00000000000..9586861fab4 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +require 'zip' +require 'aws-sdk-s3' + +module SimpleFormsApi + module S3 + module Jobs + class ArchiveUploaderJob < SimpleFormsApi::S3::Utils + include Sidekiq::Worker + + sidekiq_options retry: 3, queue: 'default' + + def perform(benefits_intake_uuid:) + @benefits_intake_uuid = benefits_intake_uuid + + @zip_path = SubmissionArchiver.fetch_s3_submission(benefits_intake_uuid) + + zip_folder + + ArchiveUploader.upload(zip_file_path: @zip_path) + + FileUtils.rm_rf(temp_directory_path) + rescue => e + handle_error('ArchiveUploaderJob failed.', e) + end + + private + + attr_reader :benefits_intake_uuid + + def zip_folder + Zip::File.open(temp_directory_path, Zip::File::CREATE) do |zip_file| + Dir[File.join(temp_directory_path, '**', '**')].each do |file| + zip_file.add(file.sub("#{temp_directory_path}/", ''), file) + end + end + end + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index f183b2b4c39..dfce602792a 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -11,10 +11,22 @@ class SubmissionArchiver < Utils class << self def fetch_presigned_url(benefits_intake_uuid) - instance = self.class.new(benefits_intake_uuid:) - pdf = instance.fetch_pdf(benefits_intake_uuid) + pdf = fetch_submission_pdf(benefits_intake_uuid) sign_s3_file_url(pdf) end + + # TODO: these instance methods are private, assess and update + def fetch_pdf(benefits_intake_uuid) + instance = new(benefits_intake_uuid:) + instance.fetch_submission_pdf(benefits_intake_uuid) + end + + # TODO: these instance methods are private, assess and update + def fetch_s3_submission(benefits_intake_uuid) + instance = new(benefits_intake_uuid:) + instance.download_folder_from_s3 + instance.temp_directory_path + end end def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper @@ -85,13 +97,23 @@ def upload_temp_folder_to_s3 end end + def download_folder_from_s3 + FileUtils.mkdir_p(temp_directory_path) + + s3_resource.bucket.objects(prefix: output_directory_path).each do |object| + local_file_path = File.join(temp_directory_path, object.key.sub(output_directory_path, '')) + FileUtils.mkdir_p(File.dirname("#{temp_directory_path}#{local_file_path}")) + object.get(response_target: local_file_path) + end + end + def generate_pdf_content raise 'Missing PDF file to upload' unless file_path Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) end - def fetch_pdf + def fetch_submission_pdf path = "#{output_directory_path}/#{submission_pdf_filename}" s3_resource.bucket(target_bucket).object(path) end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb index a7ec6886f34..5b24214dde4 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb @@ -5,7 +5,8 @@ module SimpleFormsApi module SharePoint class ArchiveUploader < Client - def upload(form_contents:, form_submission:, station_id:) + # TODO: some/most of these parameters are unnecessary + def upload(form_contents:, form_submission:, station_id:, zip_file_path:) upload_response = upload_payload(form_contents:, form_submission:, station_id:) list_item_id = fetch_list_item_id(upload_response) From df18c4577177f438f776a3245f74dc698186d8c6 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 13:44:41 -0400 Subject: [PATCH 31/38] pull out submission specific logic from archiver --- .../s3/jobs/archive_uploader_job.rb | 19 ++- .../s3/submission_archive_builder.rb | 156 ++++++++++++++++++ .../s3/submission_archive_handler.rb | 2 +- .../s3/submission_archiver.rb | 96 +---------- .../share_point/archive_uploader.rb | 2 +- 5 files changed, 176 insertions(+), 99 deletions(-) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb index 9586861fab4..18c50a47215 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/jobs/archive_uploader_job.rb @@ -1,7 +1,6 @@ # frozen_string_literal: true require 'zip' -require 'aws-sdk-s3' module SimpleFormsApi module S3 @@ -14,11 +13,9 @@ class ArchiveUploaderJob < SimpleFormsApi::S3::Utils def perform(benefits_intake_uuid:) @benefits_intake_uuid = benefits_intake_uuid - @zip_path = SubmissionArchiver.fetch_s3_submission(benefits_intake_uuid) - - zip_folder - - ArchiveUploader.upload(zip_file_path: @zip_path) + temp_directory_path = fetch_s3_folder + zip_temp_folder(temp_directory_path) + upload_s3_folder_to_sharepoint(temp_directory_path) FileUtils.rm_rf(temp_directory_path) rescue => e @@ -29,13 +26,21 @@ def perform(benefits_intake_uuid:) attr_reader :benefits_intake_uuid - def zip_folder + def zip_temp_folder(temp_directory_path) Zip::File.open(temp_directory_path, Zip::File::CREATE) do |zip_file| Dir[File.join(temp_directory_path, '**', '**')].each do |file| zip_file.add(file.sub("#{temp_directory_path}/", ''), file) end end end + + def fetch_s3_folder + SimpleFormsApi::S3::SubmissionArchiver.fetch_s3_submission(benefits_intake_uuid) + end + + def upload_s3_folder_to_sharepoint(zip_file_path) + SimpleFormsApi::SharePoint::ArchiveUploader.upload(benefits_intake_uuid:, zip_file_path:) + end end end end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb new file mode 100644 index 00000000000..b0cba53e113 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb @@ -0,0 +1,156 @@ +# frozen_string_literal: true + +require 'csv' +require 'fileutils' + +module SimpleFormsApi + module S3 + class SubmissionArchiveBuilder < Utils + attr_reader :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, :include_text_archive, + :metadata, :parent_dir, :submission + + def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper + defaults = default_options.merge(options) + + @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) + raise 'Submission was not found' unless submission + + @benefits_intake_uuid = submission.benefits_intake_uuid + + assign_instance_variables(defaults) + end + + def run + FileUtils.mkdir_p(temp_directory_path) + + process_submission_files + + temp_directory_path + rescue => e + handle_error("Failed building submission: #{submission.id}", e, { benefits_intake_uuid: }) + end + + private + + def default_options + { + attachments: [], # an array of attachment confirmation codes + file_path: nil, # file path for the PDF file to be archived + include_json_archive: true, # include the form data as a JSON object + include_manifest: true, # include a CSV file containing Veteran ID & original submission datetime + include_text_archive: true, # include the form data as a text file + metadata: {}, # pertinent metadata for original file upload/submission + parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live + } + end + + def process_submission_files + write_pdf + write_as_json_archive if include_json_archive + write_as_text_archive if include_text_archive + write_attachments if attachments.present? + write_manifest if include_manifest + write_metadata + end + + def write_pdf + write_tempfile(submission_pdf_filename, Base64.decode64(generate_pdf_content)) + end + + def generate_pdf_content + regenerate_pdf_submission unless file_path + + Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) + end + + # TODO: this will be pulled out to be more team agnostic + def regenerate_pdf_submission + form_number = SimpleFormsApi::V1::UploadsController::FORM_NUMBER_MAP[submission.form_type] + parsed_form_data = JSON.parse(submission.form_data) + form = "SimpleFormsApi::#{form_number.titleize.gsub(' ', '')}".constantize.new(parsed_form_data) + filler = SimpleFormsApi::PdfFiller.new(form_number:, form:) + + @file_path = filler.generate(timestamp: submission.created_at) + @metadata = SimpleFormsApiSubmission::MetadataValidator.validate( + form.metadata, + zip_code_is_us_based: form.zip_code_is_us_based + ) + + form.handle_attachments(file_path) if %w[vba_40_0247 vba_20_10207 vba_40_10007].include? form_number + + @attachments = form.get_attachments if form_number == 'vba_20_10207' + end + + def submission_pdf_filename + @submission_pdf_filename ||= "form_#{submission.form_data['form_number']}.pdf" + end + + def error_details(error) + "#{error.message}\n\n#{error.backtrace.join("\n")}" + end + + def write_as_json_archive + form_json = JSON.parse(submission.form_data) + write_tempfile('form_text_archive.json', JSON.pretty_generate(form_json)) + end + + def write_as_text_archive + form_text_archive = submission.form_data['claimDate'] ||= submission.created_at.iso8601 + write_tempfile('form_text_archive.txt', form_text_archive.to_json) + end + + def write_metadata + write_tempfile('metadata.json', metadata.to_json) + end + + def write_attachments + log_info("Processing #{attachments.count} attachments") + attachments.each_with_index { |upload, i| process_attachment(i + 1, upload) } + write_attachment_failure_report if attachment_failures.present? + rescue => e + handle_upload_error(e) + end + + def write_manifest + veteran_id = metadata['fileNumber'] + submission_datetime = submission.created_at + file_name = "submission_#{benefits_intake_uuid}_#{submission_datetime}_manifest.csv" + + "#{temp_directory_path}#{file_name}".tap do |file_path| + CSV.open(file_path, 'wb') do |csv| + csv << ['Veteran ID', 'Submission DateTime'] + csv << [veteran_id, submission_datetime] + end + end + end + + def write_tempfile(file_name, payload) + File.write("#{temp_directory_path}#{file_name}", payload) + end + + def process_attachment(attachment_number, guid) + log_info("Processing attachment ##{attachment_number}: #{guid}") + attachment = PersistentAttachment.find_by(guid:).to_pdf + raise 'Local record not found' unless attachment + + write_tempfile("attachment_#{attachment_number}.pdf", attachment) + rescue => e + attachment_failures << e + handle_error('Attachment failure.', e) + raise e + end + + def write_attachment_failure_report + write_tempfile('attachment_failures.txt', JSON.pretty_generate(attachment_failures)) + end + + def attachment_failures + @attachment_failures ||= [] + end + + def temp_directory_path + @temp_directory_path ||= Rails.root.join("tmp/#{benefits_intake_uuid}-#{SecureRandom.hex}/").to_s + end + end + end +end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb index 04b4a89e193..a3de977b3a9 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_handler.rb @@ -36,7 +36,7 @@ def submissions def process_individual_submissions submissions.each_with_index do |sub, idx| - message = "Processing submission: #{sub.benefits_intake_uuid} (non-grouped)" \ + message = "Processing submission: #{sub.benefits_intake_uuid} " \ "##{idx + 1} of #{submissions.count} total submissions" log_info(message, benefits_intake_uuid: sub.benefits_intake_uuid, submission_count: submissions.count) process_submission(sub.benefits_intake_uuid) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index dfce602792a..2674054ce07 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -11,7 +11,7 @@ class SubmissionArchiver < Utils class << self def fetch_presigned_url(benefits_intake_uuid) - pdf = fetch_submission_pdf(benefits_intake_uuid) + pdf = fetch_pdf(benefits_intake_uuid) sign_s3_file_url(pdf) end @@ -29,13 +29,11 @@ def fetch_s3_submission(benefits_intake_uuid) end end - def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper + def initialize(benefits_intake_uuid: nil, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) - @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) - raise 'Submission was not found' unless submission - - @benefits_intake_uuid = submission.benefits_intake_uuid + @benefits_intake_uuid = benefits_intake_uuid + @temp_directory_path = build_submission_archive(benefits_intake_uuid:, **defaults) assign_instance_variables(defaults) end @@ -43,10 +41,6 @@ def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop: def run log_info("Processing submission: #{benefits_intake_uuid}") - FileUtils.mkdir_p(temp_directory_path) - - process_submission_files - upload_temp_folder_to_s3 FileUtils.rm_f(temp_directory_path) @@ -70,17 +64,8 @@ def default_options } end - def process_submission_files - write_pdf - write_as_json_archive if include_json_archive - write_as_text_archive if include_text_archive - write_attachments if attachments.present? - write_manifest if include_manifest - write_metadata - end - - def write_pdf - write_tempfile(submission_pdf_filename, Base64.decode64(generate_pdf_content)) + def build_submission_archive(**) + SubmissionArchiveBuilder.new(**).run end def upload_temp_folder_to_s3 @@ -107,12 +92,6 @@ def download_folder_from_s3 end end - def generate_pdf_content - raise 'Missing PDF file to upload' unless file_path - - Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) - end - def fetch_submission_pdf path = "#{output_directory_path}/#{submission_pdf_filename}" s3_resource.bucket(target_bucket).object(path) @@ -126,65 +105,6 @@ def sign_s3_file_url(pdf) pdf.presigned_url(:get, expires_in: 30.minutes.to_i) end - def error_details(error) - "#{error.message}\n\n#{error.backtrace.join("\n")}" - end - - def write_as_json_archive - form_json = JSON.parse(submission.form_data) - write_tempfile('form_text_archive.json', JSON.pretty_generate(form_json)) - end - - def write_as_text_archive - form_text_archive = submission.form_data['claimDate'] ||= submission.created_at.iso8601 - write_tempfile('form_text_archive.txt', form_text_archive.to_json) - end - - def write_metadata - write_tempfile('metadata.json', metadata.to_json) - end - - def write_attachments - log_info("Processing #{attachments.count} attachments") - attachments.each_with_index { |upload, i| process_attachment(i + 1, upload) } - write_attachment_failure_report if attachment_failures.present? - rescue => e - handle_upload_error(e) - end - - def write_manifest - veteran_id = metadata['fileNumber'] - submission_datetime = submission.created_at - file_name = "submission_#{benefits_intake_uuid}_#{submission_datetime}_manifest.csv" - - "#{temp_directory_path}#{file_name}".tap do |file_path| - CSV.open(file_path, 'wb') do |csv| - csv << ['Veteran ID', 'Submission DateTime'] - csv << [veteran_id, submission_datetime] - end - end - end - - def write_tempfile(file_name, payload) - File.write("#{temp_directory_path}#{file_name}", payload) - end - - def process_attachment(attachment_number, guid) - log_info("Processing attachment ##{attachment_number}: #{guid}") - attachment = PersistentAttachment.find_by(guid:).to_pdf - raise 'Local record not found' unless attachment - - write_tempfile("attachment_#{attachment_number}.pdf", attachment) - rescue => e - attachment_failures << e - handle_error('Attachment failure.', e) - raise e - end - - def write_attachment_failure_report - write_tempfile('attachment_failures.txt', JSON.pretty_generate(attachment_failures)) - end - def save_file_to_s3(path, content) s3_resource.bucket(target_bucket).object(path).tap { |obj| obj.put(body: content) } end @@ -193,10 +113,6 @@ def output_directory_path @output_directory_path ||= "#{parent_dir}/#{benefits_intake_uuid}" end - def attachment_failures - @attachment_failures ||= [] - end - def temp_directory_path @temp_directory_path ||= Rails.root.join("tmp/#{benefits_intake_uuid}-#{SecureRandom.hex}/").to_s end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb index 5b24214dde4..84aa6f9c2da 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb @@ -6,7 +6,7 @@ module SimpleFormsApi module SharePoint class ArchiveUploader < Client # TODO: some/most of these parameters are unnecessary - def upload(form_contents:, form_submission:, station_id:, zip_file_path:) + def upload(benefits_intake_uuid:, zip_file_path:) upload_response = upload_payload(form_contents:, form_submission:, station_id:) list_item_id = fetch_list_item_id(upload_response) From 96059f6a4bc9fc12b5d7990f3eba3b53a597a272 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 14:54:36 -0400 Subject: [PATCH 32/38] more sharepoint service changes --- .../share_point/archive_uploader.rb | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb index 84aa6f9c2da..eec5bfbf4f0 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/archive_uploader.rb @@ -5,19 +5,21 @@ module SimpleFormsApi module SharePoint class ArchiveUploader < Client - # TODO: some/most of these parameters are unnecessary - def upload(benefits_intake_uuid:, zip_file_path:) - upload_response = upload_payload(form_contents:, form_submission:, station_id:) + def upload(benefits_intake_uuid:, file_path:) + @benefits_intake_uuid = benefits_intake_uuid + @file_path = file_path + + upload_response = upload_payload list_item_id = fetch_list_item_id(upload_response) - update_sharepoint_item(list_item_id:, form_submission:, station_id:) + update_sharepoint_item(list_item_id:, station_id:) rescue => e handle_upload_error(e) end private - def upload_payload(form_contents:, form_submission:, station_id:) + def upload_payload payload_path = generate_payload_path payload_name = build_payload_name @@ -54,7 +56,7 @@ def retrieve_list_item_id(uri) end end - def update_sharepoint_item(list_item_id:, form_submission:, station_id:) + def update_sharepoint_item(list_item_id:, station_id:) # TODO: this is a placeholder path and will need to be changed path = "#{base_path}/_api/Web/Lists/GetByTitle('Submissions')/items(#{list_item_id})" with_monitoring do @@ -62,19 +64,17 @@ def update_sharepoint_item(list_item_id:, form_submission:, station_id:) req.headers['Content-Type'] = 'application/json;odata=verbose' req.headers['X-HTTP-METHOD'] = 'MERGE' req.headers['If-Match'] = '*' - req.body = build_item_payload(form_submission, station_id).to_json + req.body = build_item_payload(station_id).to_json end end end - # TODO: this is a holdover from VHA logic and will need changed - def build_item_payload(form_submission, station_id) + # TODO: this is incomplete and needs to be finished + def build_item_payload(station_id) { '__metadata' => { 'type' => 'SP.Data.SubmissionsItem' }, 'StationId' => station_id, - 'UID' => form_submission.id - # 'SSN' => user[:ssn], - # 'Name1' => "#{user[:last_name]}, #{user[:first_name]}" + 'UID' => benefits_intake_uuid } end end From cb3696c1f0d9a9340c22cffcbb12e6885d80b175 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Wed, 11 Sep 2024 16:20:09 -0400 Subject: [PATCH 33/38] updates in accordance with remediation documentation --- .../s3/submission_archive_builder.rb | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb index b0cba53e113..0d1df1f0bc3 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb @@ -3,6 +3,8 @@ require 'csv' require 'fileutils' +# built in accordance with the following documentation: +# https://github.com/department-of-veterans-affairs/va.gov-team-sensitive/blob/master/platform/practices/zero-silent-failures/remediation.md module SimpleFormsApi module S3 class SubmissionArchiveBuilder < Utils @@ -112,20 +114,22 @@ def write_attachments end def write_manifest - veteran_id = metadata['fileNumber'] - submission_datetime = submission.created_at - file_name = "submission_#{benefits_intake_uuid}_#{submission_datetime}_manifest.csv" - - "#{temp_directory_path}#{file_name}".tap do |file_path| - CSV.open(file_path, 'wb') do |csv| - csv << ['Veteran ID', 'Submission DateTime'] - csv << [veteran_id, submission_datetime] - end + file_name = "submission_#{benefits_intake_uuid}_#{submission.created_at}_manifest.csv" + file_path = File.join(temp_directory_path, file_name) + + CSV.open(file_path, 'wb') do |csv| + csv << ['Submission DateTime', 'Form Type', 'VA.gov ID', 'Veteran ID', 'First Name', 'Last Name'] + csv << [ + submission.created_at, + submission.form_data['form_number'], + benefits_intake_uuid, + metadata['fileNumber'], + submission.form_data['first_name'], + submission.form_data['last_name'] + ] end - end - def write_tempfile(file_name, payload) - File.write("#{temp_directory_path}#{file_name}", payload) + file_path end def process_attachment(attachment_number, guid) @@ -144,6 +148,10 @@ def write_attachment_failure_report write_tempfile('attachment_failures.txt', JSON.pretty_generate(attachment_failures)) end + def write_tempfile(file_name, payload) + File.write("#{temp_directory_path}#{file_name}", payload) + end + def attachment_failures @attachment_failures ||= [] end From 4ffc33779c3a3b05dedd823bf1516ccd91e63543 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 12 Sep 2024 09:57:38 -0400 Subject: [PATCH 34/38] minor sharepoint tweaks, archive builder tests --- .../s3/submission_archive_builder.rb | 71 +++++++++---------- .../simple_forms_api/share_point/client.rb | 19 ++--- .../s3/submission_archive_builder_spec.rb | 44 ++++++++++++ 3 files changed, 89 insertions(+), 45 deletions(-) create mode 100644 modules/simple_forms_api/spec/services/s3/submission_archive_builder_spec.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb index 0d1df1f0bc3..c065eaa3bef 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archive_builder.rb @@ -8,16 +8,13 @@ module SimpleFormsApi module S3 class SubmissionArchiveBuilder < Utils - attr_reader :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, :include_text_archive, - :metadata, :parent_dir, :submission - def initialize(benefits_intake_uuid: nil, submission: nil, **options) # rubocop:disable Lint/MissingSuper defaults = default_options.merge(options) @submission = submission || FormSubmission.find_by(benefits_intake_uuid:) - raise 'Submission was not found' unless submission + raise 'Submission was not found' unless @submission - @benefits_intake_uuid = submission.benefits_intake_uuid + @benefits_intake_uuid = @submission.benefits_intake_uuid assign_instance_variables(defaults) end @@ -34,12 +31,15 @@ def run private + attr_reader :attachments, :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, + :include_text_archive, :metadata, :parent_dir, :submission + def default_options { attachments: [], # an array of attachment confirmation codes file_path: nil, # file path for the PDF file to be archived include_json_archive: true, # include the form data as a JSON object - include_manifest: true, # include a CSV file containing Veteran ID & original submission datetime + include_manifest: true, # include a CSV file containing manifest data include_text_archive: true, # include the form data as a text file metadata: {}, # pertinent metadata for original file upload/submission parent_dir: 'vff-simple-forms' # S3 bucket base directory where files live @@ -50,26 +50,21 @@ def process_submission_files write_pdf write_as_json_archive if include_json_archive write_as_text_archive if include_text_archive - write_attachments if attachments.present? + write_attachments unless attachments.empty? write_manifest if include_manifest write_metadata end def write_pdf - write_tempfile(submission_pdf_filename, Base64.decode64(generate_pdf_content)) + write_tempfile(submission_pdf_filename, File.read(generate_pdf_content)) end + # TODO: this will be pulled out to be more team agnostic def generate_pdf_content - regenerate_pdf_submission unless file_path - - Faraday::UploadIO.new(file_path, Mime[:pdf].to_s, File.basename(file_path)) - end + return file_path if file_path - # TODO: this will be pulled out to be more team agnostic - def regenerate_pdf_submission form_number = SimpleFormsApi::V1::UploadsController::FORM_NUMBER_MAP[submission.form_type] - parsed_form_data = JSON.parse(submission.form_data) - form = "SimpleFormsApi::#{form_number.titleize.gsub(' ', '')}".constantize.new(parsed_form_data) + form = "SimpleFormsApi::#{form_number.titleize.gsub(' ', '')}".constantize.new(form_data_hash) filler = SimpleFormsApi::PdfFiller.new(form_number:, form:) @file_path = filler.generate(timestamp: submission.created_at) @@ -81,10 +76,15 @@ def regenerate_pdf_submission form.handle_attachments(file_path) if %w[vba_40_0247 vba_20_10207 vba_40_10007].include? form_number @attachments = form.get_attachments if form_number == 'vba_20_10207' + @file_path + end + + def form_data_hash + @form_data_hash ||= JSON.parse(submission.form_data) end def submission_pdf_filename - @submission_pdf_filename ||= "form_#{submission.form_data['form_number']}.pdf" + @submission_pdf_filename ||= "form_#{form_data_hash['form_number']}.pdf" end def error_details(error) @@ -92,13 +92,12 @@ def error_details(error) end def write_as_json_archive - form_json = JSON.parse(submission.form_data) - write_tempfile('form_text_archive.json', JSON.pretty_generate(form_json)) + write_tempfile('form_json_archive.json', JSON.pretty_generate(form_data_hash)) end def write_as_text_archive - form_text_archive = submission.form_data['claimDate'] ||= submission.created_at.iso8601 - write_tempfile('form_text_archive.txt', form_text_archive.to_json) + form_data_hash['claim_date'] ||= submission.created_at.iso8601 + write_tempfile('form_text_archive.txt', form_data_hash.to_s) end def write_metadata @@ -113,6 +112,18 @@ def write_attachments handle_upload_error(e) end + def process_attachment(attachment_number, guid) + log_info("Processing attachment ##{attachment_number}: #{guid}") + attachment = PersistentAttachment.find_by(guid:).to_pdf + raise 'Local record not found' unless attachment + + write_tempfile("attachment_#{attachment_number}.pdf", attachment) + rescue => e + attachment_failures << e + handle_error('Attachment failure.', e) + raise e + end + def write_manifest file_name = "submission_#{benefits_intake_uuid}_#{submission.created_at}_manifest.csv" file_path = File.join(temp_directory_path, file_name) @@ -121,29 +132,17 @@ def write_manifest csv << ['Submission DateTime', 'Form Type', 'VA.gov ID', 'Veteran ID', 'First Name', 'Last Name'] csv << [ submission.created_at, - submission.form_data['form_number'], + form_data_hash['form_number'], benefits_intake_uuid, metadata['fileNumber'], - submission.form_data['first_name'], - submission.form_data['last_name'] + metadata['veteranFirstName'], + metadata['veteranLastName'] ] end file_path end - def process_attachment(attachment_number, guid) - log_info("Processing attachment ##{attachment_number}: #{guid}") - attachment = PersistentAttachment.find_by(guid:).to_pdf - raise 'Local record not found' unless attachment - - write_tempfile("attachment_#{attachment_number}.pdf", attachment) - rescue => e - attachment_failures << e - handle_error('Attachment failure.', e) - raise e - end - def write_attachment_failure_report write_tempfile('attachment_failures.txt', JSON.pretty_generate(attachment_failures)) end diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb index e79d8801f21..c9d1bdbb4c8 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/client.rb @@ -10,13 +10,13 @@ class Client class ListItemNotFound < StandardError; end - # TODO: this is a placeholder; add configuration for OFO/VBA sharepoint access - STATSD_KEY_PREFIX = 'api.ofo.submission_error_remediation.sharepoint.request' + # TODO: this is a placeholder; add configuration for VBA sharepoint access + STATSD_KEY_PREFIX = 'api.vba.submission_error_remediation.sharepoint.request' attr_reader :settings attr_accessor :access_token - # TODO: these are placeholders; add configuration for OFO/VBA sharepoint access + # TODO: these are placeholders; add configuration for VBA sharepoint access def_delegators :settings, :authentication_url, :base_path, :client_id, :client_secret, :resource, :service_name, :sharepoint_url, :tenant_id @@ -44,16 +44,17 @@ def auth_params # TODO: change this to interface with S3 or an intermediary job/service def upload_to_sharepoint(payload_path, payload_name) with_monitoring do - sharepoint_file_connection.post(file_transfer_url(payload_name)) do |req| + sharepoint_file_connection.post(file_creation_url(payload_name)) do |req| req.headers['Content-Type'] = 'octet/stream' req.body = Faraday::UploadIO.new(File.open(payload_path), 'octet/stream') end end end - # TODO: confirm this is the correct payload url - def file_transfer_url(payload_name) - "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('#{base_path}/Submissions')/" \ + # TODO: this url may need tweaking + # reference: https://learn.microsoft.com/en-us/sharepoint/dev/sp-add-ins/working-with-folders-and-files-with-rest + def file_creation_url(payload_name) + "#{base_path}/_api/Web/GetFolderByServerRelativeUrl('Benefits Portfolio/Documents/VBA Manual Form Upload')/" \ "Files/add(url='#{payload_name}.zip',overwrite=true)" end @@ -108,9 +109,9 @@ def sharepoint_headers } end - # TODO: this is a placeholder; add configuration for OFO/VBA sharepoint access + # TODO: this is a placeholder; add configuration for VBA sharepoint access def initialize_settings - Settings.ofo.sharepoint + Settings.vba.sharepoint end end end diff --git a/modules/simple_forms_api/spec/services/s3/submission_archive_builder_spec.rb b/modules/simple_forms_api/spec/services/s3/submission_archive_builder_spec.rb new file mode 100644 index 00000000000..1a76f8232da --- /dev/null +++ b/modules/simple_forms_api/spec/services/s3/submission_archive_builder_spec.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +require 'rails_helper' +require SimpleFormsApi::Engine.root.join('spec', 'spec_helper.rb') + +RSpec.describe SimpleFormsApi::S3::SubmissionArchiveBuilder do + let(:form_id) { '21-10210' } + let(:form_data) { File.read("modules/simple_forms_api/spec/fixtures/form_json/vba_#{form_id.gsub('-', '_')}.json") } + let(:submission) { create(:form_submission, :pending, form_type: form_id, form_data:) } + let(:benefits_intake_uuid) { submission.benefits_intake_uuid } + let(:archive_builder_instance) { described_class.new(benefits_intake_uuid:) } + + before do + allow(FormSubmission).to receive(:find_by).and_return(submission) + allow_any_instance_of(described_class).to receive(:assign_instance_variables).and_call_original + allow(SecureRandom).to receive(:hex).and_return('random-letters-n-numbers') + end + + describe '#initialize' do + subject(:new) { archive_builder_instance } + + let(:defaults) do + { + attachments: [], + file_path: nil, + include_json_archive: true, + include_manifest: true, + include_text_archive: true, + metadata: {}, + parent_dir: 'vff-simple-forms' + } + end + + it { is_expected.to have_received(:assign_instance_variables).with(defaults) } # rubocop:disable RSpec/SubjectStub + end + + describe '#run' do + subject(:run) { archive_builder_instance.run } + + it 'completes successfully' do + expect(run).to eq(Rails.root.join("tmp/#{benefits_intake_uuid}-random-letters-n-numbers/").to_s) + end + end +end From d00cbcfa3195d88006a39dd0cd3117a3551fcbe8 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 12 Sep 2024 10:53:12 -0400 Subject: [PATCH 35/38] add job to circumvent S3 if necessary --- .../jobs/build_archive_and_upload_job.rb | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 modules/simple_forms_api/app/services/simple_forms_api/share_point/jobs/build_archive_and_upload_job.rb diff --git a/modules/simple_forms_api/app/services/simple_forms_api/share_point/jobs/build_archive_and_upload_job.rb b/modules/simple_forms_api/app/services/simple_forms_api/share_point/jobs/build_archive_and_upload_job.rb new file mode 100644 index 00000000000..b363f63d121 --- /dev/null +++ b/modules/simple_forms_api/app/services/simple_forms_api/share_point/jobs/build_archive_and_upload_job.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +require 'zip' + +module SimpleFormsApi + module SharePoint + module Jobs + class BuildArchiveAndUploadJob < SimpleFormsApi::S3::Utils + include Sidekiq::Worker + + sidekiq_options retry: 3, queue: 'default' + + def perform(benefits_intake_uuid:) + @benefits_intake_uuid = benefits_intake_uuid + + temp_directory_path = build_submission_archive + zip_temp_folder(temp_directory_path) + upload_folder_to_sharepoint(temp_directory_path) + + FileUtils.rm_rf(temp_directory_path) + rescue => e + handle_error('BuildArchiveAndUploadJob failed.', e) + end + + private + + attr_reader :benefits_intake_uuid + + def zip_temp_folder(temp_directory_path) + Zip::File.open(temp_directory_path, Zip::File::CREATE) do |zip_file| + Dir[File.join(temp_directory_path, '**', '**')].each do |file| + zip_file.add(file.sub("#{temp_directory_path}/", ''), file) + end + end + end + + def build_submission_archive + SubmissionArchiveBuilder.new(benefits_intake_uuid:).run + end + + def upload_folder_to_sharepoint(zip_file_path) + SimpleFormsApi::SharePoint::ArchiveUploader.upload(benefits_intake_uuid:, zip_file_path:) + end + end + end + end +end From 95455f7437bb9b1df05e09504a8806eeb55afc97 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 12 Sep 2024 10:53:52 -0400 Subject: [PATCH 36/38] add missing library to utils --- .../simple_forms_api/app/services/simple_forms_api/s3/utils.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb index 3676a1e5dd9..f9c0e061708 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/utils.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'reports/uploader' + module SimpleFormsApi module S3 class Utils From c8fde242fa65f502c51442ea5d5188f3d7fe5337 Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 12 Sep 2024 10:55:12 -0400 Subject: [PATCH 37/38] update test coverage for submission archiver --- .../s3/submission_archiver.rb | 8 +- .../services/s3/submission_archiver_spec.rb | 147 ++++-------------- 2 files changed, 38 insertions(+), 117 deletions(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index 2674054ce07..96d68deda9c 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -6,9 +6,6 @@ module SimpleFormsApi module S3 class SubmissionArchiver < Utils - attr_reader :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, :include_text_archive, - :metadata, :parent_dir, :submission - class << self def fetch_presigned_url(benefits_intake_uuid) pdf = fetch_pdf(benefits_intake_uuid) @@ -47,11 +44,14 @@ def run output_directory_path rescue => e - handle_error("Failed submission: #{submission.id}", e, { submission_id: submission.id, benefits_intake_uuid: }) + handle_error("Failed submission: #{benefits_intake_uuid}", e, { benefits_intake_uuid: }) end private + attr_reader :benefits_intake_uuid, :file_path, :include_json_archive, :include_manifest, :include_text_archive, + :metadata, :parent_dir, :submission + def default_options { attachments: [], # an array of attachment confirmation codes diff --git a/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb b/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb index e4e4d3e07e9..347b8ac09c1 100644 --- a/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb +++ b/modules/simple_forms_api/spec/services/s3/submission_archiver_spec.rb @@ -3,140 +3,61 @@ require 'rails_helper' require SimpleFormsApi::Engine.root.join('spec', 'spec_helper.rb') -RSpec.describe SimpleFormsApi::S3::SubmissionArchiver, type: :model do - let(:submission_id) { 1 } +# rubocop:disable RSpec/SubjectStub +RSpec.describe SimpleFormsApi::S3::SubmissionArchiver do let(:form_id) { '21-10210' } - let(:form_data) { File.read('modules/simple_forms_api/spec/fixtures/form_json/vba_21_10210.json') } + let(:form_data) { File.read("modules/simple_forms_api/spec/fixtures/form_json/vba_#{form_id.gsub('-', '_')}.json") } let(:submission) { create(:form_submission, :pending, form_type: form_id, form_data:) } + let(:benefits_intake_uuid) { submission.benefits_intake_uuid } let(:options) do { + attachments: [], + file_path: nil, include_json_archive: true, + include_manifest: true, include_text_archive: true, - parent_dir: 'test-dir', - quiet_pdf_failures: true, - quiet_upload_failures: true, - run_quiet: true + metadata: {}, + parent_dir: 'test-dir' } end - let(:archive_submission) { described_class.new(submission_id:, **options) } + let(:archive_submission_instance) { described_class.new(benefits_intake_uuid:, **options) } + let(:temp_directory_path) { Rails.root.join("tmp/#{benefits_intake_uuid}-random-letters-n-numbers/").to_s } before do - allow(FormSubmission).to receive(:find).and_return(submission) + allow(FormSubmission).to receive(:find_by).and_return(submission) + allow(SecureRandom).to receive(:hex).and_return('random-letters-n-numbers') + allow_any_instance_of(described_class).to receive(:assign_instance_variables).and_call_original + allow_any_instance_of(described_class).to receive(:build_submission_archive).and_call_original + allow_any_instance_of(described_class).to receive(:log_info).and_call_original end describe '#initialize' do - it 'sets default values for instance variables' do - expect(archive_submission.submission).to eq(submission) - expect(archive_submission.parent_dir).to eq('test-dir') - expect(archive_submission.include_json_archive).to be(true) - expect(archive_submission.include_text_archive).to be(true) - expect(archive_submission.quiet_pdf_failures).to be(true) - expect(archive_submission.quiet_upload_failures).to be(true) - end - end - - describe '#run' do - before do - allow(archive_submission).to receive(:process_submission_files) - allow(archive_submission).to receive(:output_directory_path).and_return('/some/path') - allow(archive_submission).to receive(:log_info) - end + subject(:new_instance) { archive_submission_instance } - it 'logs the processing of the submission and calls process_submission_files' do - expect(archive_submission).to receive(:log_info).with("Processing submission ID: #{submission.id}") - expect(archive_submission).to receive(:process_submission_files) - archive_submission.run + let(:archive_submission_instance) { described_class.new(benefits_intake_uuid:) } + let(:defaults) do + { + attachments: [], + file_path: nil, + include_json_archive: true, + include_manifest: true, + include_text_archive: true, + metadata: {}, + parent_dir: 'vff-simple-forms' + } end - context 'when an error occurs' do - before do - allow(archive_submission).to receive(:process_submission_files).and_raise(StandardError, 'Processing error') - end - - xit 'handles errors and logs them' do - expect(archive_submission).to( - receive(:handle_error).with( - "Failed submission: #{submission.id}", - instance_of(StandardError), submission_id: submission.id - ) - ) - expect { archive_submission.run }.not_to raise_error - end - end + it { is_expected.to have_received(:assign_instance_variables).with(defaults) } + it { is_expected.to have_received(:build_submission_archive) } end - describe '#write_pdf' do - before do - allow(archive_submission).to receive(:generate_pdf_content).and_return(Base64.encode64('pdf content')) - allow(archive_submission).to receive(:save_file_to_s3) - end + describe '#run' do + subject(:run) { described_class.new(benefits_intake_uuid:).run } - xit 'writes the PDF to S3' do - expect(archive_submission).to receive(:save_file_to_s3).with(/form.pdf/, 'pdf content') - archive_submission.run - end + xit { is_expected.to have_received(:log_info).with("Processing submission: #{benefits_intake_uuid}") } context 'when an error occurs' do - before do - allow(archive_submission).to receive(:generate_pdf_content).and_raise(StandardError, 'PDF generation error') - end - - it 'handles pdf generation errors based on quiet_pdf_failures' do - expect(archive_submission).to receive(:write_pdf_error).with(instance_of(StandardError)) - expect { archive_submission.run }.not_to raise_error - end - end - end - - describe '#write_as_json_archive' do - before do - allow(archive_submission).to receive(:save_file_to_s3) - allow(archive_submission).to receive(:form_json).and_return({ key: 'value' }) - end - - it 'writes the JSON archive to S3' do - expect(archive_submission).to receive(:save_file_to_s3).with(/form_text_archive.json/, - JSON.pretty_generate({ key: 'value' })) - archive_submission.run - end - end - - describe '#write_as_text_archive' do - before do - allow(archive_submission).to receive(:save_file_to_s3) - allow(archive_submission).to receive(:form_text_archive).and_return({ key: 'value' }) - end - - it 'writes the text archive to S3' do - expect(archive_submission).to receive(:save_file_to_s3).with(/form_text_archive.txt/, { key: 'value' }.to_json) - archive_submission.run - end - end - - describe '#write_metadata' do - before do - allow(archive_submission).to receive(:save_file_to_s3) - allow(archive_submission).to receive(:metadata).and_return({ key: 'value' }) - end - - xit 'writes metadata to S3' do - expect(archive_submission).to receive(:save_file_to_s3).with(/metadata.json/, { key: 'value' }.to_json) - archive_submission.run - end - end - - describe '#handle_error' do - before do - allow(archive_submission).to receive(:process_submission_files).and_return(error) - end - - let(:error) { StandardError.new('some error') } - - xit 'logs the error and re-raises it' do - expect(archive_submission).to receive(:log_error).with( - "Failed submission: #{submission.id}", error, submission_id: submission.id - ) - expect { archive_submission.run }.to raise_error(error) end end end +# rubocop:enable RSpec/SubjectStub From 929380e7893c0235a0d5f8c504b8abc8695bc2fe Mon Sep 17 00:00:00 2001 From: Jacob Penner Date: Thu, 12 Sep 2024 14:15:04 -0400 Subject: [PATCH 38/38] fix random thing --- .../app/services/simple_forms_api/s3/submission_archiver.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb index 96d68deda9c..bb3c169213f 100644 --- a/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb +++ b/modules/simple_forms_api/app/services/simple_forms_api/s3/submission_archiver.rb @@ -98,7 +98,7 @@ def fetch_submission_pdf end def submission_pdf_filename - @submission_pdf_filename ||= "form_#{submission.form_data['form_number']}.pdf" + @submission_pdf_filename ||= "form_#{JSON.parse(submission.form_data)['form_number']}.pdf" end def sign_s3_file_url(pdf)