Skip to content

Commit

Permalink
Remove log of record fixes
Browse files Browse the repository at this point in the history
- As far as I can tell, we never use these, and it complicates breaking the updates into separate background jobs
- rubocop
  • Loading branch information
maxkadel committed Jan 2, 2025
1 parent ab20776 commit 6afb495
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 50 deletions.
1 change: 0 additions & 1 deletion app/models/scsb/partner_updates/full.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ def process_full_files
download_and_process_full(inst: 'CUL', prefix: 'scsbfull_cul_')
download_and_process_full(inst: 'HL', prefix: 'scsbfull_hl_')
set_generated_date
log_record_fixes
end

def download_and_process_full(inst:, prefix:)
Expand Down
1 change: 0 additions & 1 deletion app/models/scsb/partner_updates/incremental.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ def process_incremental_files
update_files = download_partner_updates
process_partner_updates(files: update_files)
set_generated_date
log_record_fixes
delete_files = download_partner_deletes
process_partner_deletes(files: delete_files)
end
Expand Down
47 changes: 5 additions & 42 deletions app/models/scsb/partner_updates/update.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ def initialize(dump:, dump_file_type:, timestamp:)
@s3_bucket = Scsb::S3Bucket.partner_transfer_client
@scsb_file_dir = ENV['SCSB_FILE_DIR']
@update_directory = ENV['SCSB_PARTNER_UPDATE_DIRECTORY'] || '/tmp/updates'
@inv_xml = []
@tab_newline = []
@leader = []
@composed_chars = []
@bad_utf8 = []
end

def attach_dump_file(filepath, dump_file_type: nil)
Expand Down Expand Up @@ -59,26 +54,11 @@ def process_partner_updates(files:, file_prefix: 'scsb_update_')
def process_record(record)
record = field_delete(['856', '959'], record)
record.leader[5] = 'c' if record.leader[5].eql?('d')
if bad_utf8?(record)
@bad_utf8 << record['001']
record = bad_utf8_fix(record)
end
if invalid_xml_chars?(record)
@inv_xml << record['001']
record = invalid_xml_fix(record)
end
if tab_newline_char?(record)
@tab_newline << record['001']
record = tab_newline_fix(record)
end
if leader_errors?(record)
@leader << record['001']
record = leaderfix(record)
end
if composed_chars_errors?(record)
@composed_chars << record['001']
record = composed_chars_normalize(record)
end
record = bad_utf8_fix(record) if bad_utf8?(record)
record = invalid_xml_fix(record) if invalid_xml_chars?(record)
record = tab_newline_fix(record) if tab_newline_char?(record)
record = leaderfix(record) if leader_errors?(record)
record = composed_chars_normalize(record) if composed_chars_errors?(record)
record = extra_space_fix(record)
empty_subfield_fix(record)
end
Expand All @@ -90,23 +70,6 @@ def add_error(message:)
@dump.event.save
end

def log_record_fixes
log_file = {
inv_xml: @inv_xml,
tab_newline: @tab_newline,
leader: @leader,
composed_chars: @composed_chars,
bad_utf8: @bad_utf8
}
filepath = log_file_name
File.write(filepath, log_file.to_json.to_s)
attach_dump_file(filepath, dump_file_type: :log_file)
end

def log_file_name
"#{@scsb_file_dir}/fixes_#{@last_dump.to_time.strftime('%Y_%m_%d')}.json"
end

def set_generated_date
@dump.generated_date = date_strings.map { |str| DateTime.parse(str) }.sort.first
end
Expand Down
4 changes: 1 addition & 3 deletions spec/models/scsb/partner_updates/full_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,10 @@

# attaches marcxml and log files
expect(dump.dump_files.where(dump_file_type: :recap_records_full).length).to eq(2)
expect(dump.dump_files.where(dump_file_type: :log_file).length).to eq(1)
expect(dump.dump_files.map(&:path)).to contain_exactly(
File.join(scsb_file_dir, 'scsbfull_nypl_20210430_015000_1.xml.gz'),
File.join(scsb_file_dir, 'scsbfull_nypl_20210430_015000_2.xml.gz'),
File.join(scsb_file_dir, 'ExportDataDump_Full_NYPL_20210430_015000.csv.gz'),
a_string_matching(/#{scsb_file_dir}\/fixes_\d{4}_\d{2}_\d{2}.json.gz/)
File.join(scsb_file_dir, 'ExportDataDump_Full_NYPL_20210430_015000.csv.gz')
)
expect(dump.event.error).to eq 'No metadata files found matching CUL; No metadata files found matching HL'
# cleans up
Expand Down
4 changes: 1 addition & 3 deletions spec/models/scsb/partner_updates/incremental_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@

# attaches marcxml and log files
expect(dump.dump_files.where(dump_file_type: :recap_records).length).to eq(2)
expect(dump.dump_files.where(dump_file_type: :log_file).length).to eq(1)
expect(dump.dump_files.map(&:path)).to contain_exactly(
File.join(scsb_file_dir, 'scsb_update_20210622_183200_1.xml.gz'),
File.join(scsb_file_dir, 'scsb_update_20210622_183200_2.xml.gz'),
a_string_matching(/#{scsb_file_dir}\/fixes_\d{4}_\d{2}_\d{2}.json.gz/)
File.join(scsb_file_dir, 'scsb_update_20210622_183200_2.xml.gz')
)

expect(dump.generated_date).to eq DateTime.parse('2021-06-22')
Expand Down

0 comments on commit 6afb495

Please sign in to comment.