Skip to content

Commit

Permalink
first stab at an ex-libris holdings xml parser
Browse files Browse the repository at this point in the history
puts header elsewhere

using ruby-marc instead of plain nokogiri
  • Loading branch information
mwarin committed Apr 26, 2023
1 parent 60c58e3 commit 31bf319
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ gem "zinzout"
gem "puma"
gem "sidekiq", "~> 6.0"
gem "sidekiq-batch"
gem "marc"

group :development, :test do
gem "pry"
Expand Down
97 changes: 97 additions & 0 deletions bin/ex_libris_holdings_xml_parser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
require "marc"

class ExLibrisHoldingsXmlParser
def main(files)
files.each do |file|
marc_reader = MARC::XMLReader.new(file)
marc_reader.each do |marc_record|
ht_record = HTRecord.new(marc_record)
# next if ...
yield ht_record
end
end
end
end

class HTRecord
def initialize(marc_record)
@marc_record = marc_record # Marc::Record
end

def self.header_tsv
%w[item_type oclc local_id status condition enum_chron issn govdoc].join("\t")
end

# Should use the same order as HTRecord.header_tsv
def to_tsv
[item_type, oclc, local_id, status, condition, enum_chron, issn, govdoc]
.join("\t")
end

def oclc
@oclc ||= @marc_record["035"]["a"]
end

def local_id
@local_id ||= @marc_record["001"]
end

def condition
@condition ||= @marc_record["ITM"]["c"]
end

def item_type
@item_type ||= map_item_type(@marc_record["ITM"]["m"])
end

# Todo: Figure out the structure of enum_chrons.
def enum_chron
@enum_chron ||= [
@marc_record["ITM"]["a"],
@marc_record["ITM"]["b"],
@marc_record["ITM"]["i"],
@marc_record["ITM"]["j"]
].reject{ |x| x.nil? || x.empty? }.join(",")
end

def map_item_type(item_type)
# Todo: figure out mapping for item_type.
# see https://wiki.harvard.edu/confluence/display/LibraryStaffDoc/Item+Material+Types
{
"ISSUE" => "ser",
"DVD" => "skip",
"BOOK" => "mon",
}[item_type] || "mix"
end

# Todo: figure out status.
def status
@status ||= map_status(@marc_record["ITM"]["k"])
end

# Todo: figure out mapping for status.
def map_status(status)
{
"MISSING" => "LM",
"LOST_LOAN" => "LM",
}[status] || "CH"
end

# Todo: figure out issn.
def issn
"dunno"
end

# Todo: figure out govdocness.
def govdoc
"dunno"
end
end

# Parse any incoming files, output to stdout.
if $0 == __FILE__
puts HTRecord.header_tsv
ExLibrisHoldingsXmlParser.new.main(ARGV) do |ht_record|
puts ht_record.to_tsv
end
end

0 comments on commit 31bf319

Please sign in to comment.