diff --git a/Gemfile b/Gemfile index f967d1e0..b34d1175 100644 --- a/Gemfile +++ b/Gemfile @@ -1,12 +1,7 @@ source 'https://rubygems.org' -gem 'arel' gem 'atomic' -gem 'builder' gem 'bundler' -gem 'coffee-script' -gem 'coffee-script-source' -gem 'erubis' gem 'execjs' gem 'jsmin' gem 'json' @@ -23,12 +18,6 @@ gem 'rack-test' gem 'rdoc' gem 'rspec' gem 'rubyzip' -gem 'sass' -gem 'sdoc' gem 'sinatra' gem 'sinatra-assetpack' -gem 'thread_safe' -gem 'tilt' -gem 'tzinfo' -gem 'uglifier' gem 'zip' \ No newline at end of file diff --git a/README.md b/README.md index 57925624..40d841ee 100644 --- a/README.md +++ b/README.md @@ -66,16 +66,20 @@ To change, for example, the version of the ArchivesSpace target, add the followi line Appdata.aspace_version 'v1.0.1' - -# Notes -A typical migration can take several hours and will cause ArchivesSpace's -indexer to get backed up. Migrated records may not appear right away in browse or search results in ArchivesSpace. Consider running ArchivesSpace with the indexer -turned off to speed up the migration process. +If Archon response times become slow due to network latency or large datasets, it is +possible to speed up successive tests by turning on database caching. Note that you must manually delete +the database if you point the migration tool at a new Archon instance. + + Appdata.use_dbcache true -A large migration may fail because of an expiration of the migration tool's session in ArchivesSpace. Avoid this by setting a 10 hour session expiration threshold in the ArchivesSpace configuration file: +*Note: this feature is not complete and should be left off by default. - AppConfig[:session_expire_after_seconds] = 36000 +# Notes + +A typical migration can take several hours and could cause ArchivesSpace's +indexer to get backed up. Migrated records may not appear right away in browse or search results in ArchivesSpace. Consider running ArchivesSpace with the indexer +turned off to speed up the migration process, or upgrading to a later version of ArchivesSpace. Do not run a migration process against an ArchivesSpace instance that already contains data. diff --git a/TECHNICAL_OVERVIEW.md b/TECHNICAL_OVERVIEW.md new file mode 100644 index 00000000..5013f7ca --- /dev/null +++ b/TECHNICAL_OVERVIEW.md @@ -0,0 +1,51 @@ +Archon2ArchivesSpace TECHNICAL OVERVIEW +================ +# Application + +The file at app/main.rb invokes a web application built on the Sinatra framework (http://www.sinatrarb.com/). + +The application root ('/') responds to HTTP GET requests with a simple form in +which a user enters credentials for an Archon instance and an ArchivesSpace instance +and clicks a button. The resulting POST request initiates an instance of the +MigrationJob class. While the job is running, its output is yielded to the client's +browser as a JSON stream. + +# Clients + +The application contains a client class for both Archon and ArchivesSpace. Clients +handle the basic HTTP requests that are needed to read data from Archon and post +it to ArchivesSpace. + +The ArchivesSpace client relies on some libraries that are extracted from the +ArchivesSpace source code. See the README document for instructions for updating +these files to match the ArchivesSpace release being targeted. + +# MigrationJob + +This class is the controller for a single migration from point A (Archon) to point B +(ArchivesSpace). It moves through the various Archon record types, reading the +records provided by the Archon client, transforming them, and either sending them +directly to ArchivesSpace or pushing them into a record batch that the ArchivesSpace +client posts in a single request. + +# Archon Models + +Archon records are represented by model classes defined in app/models. Most model +classes implement a 'transform' method which initializes a new object representing +a corresponding ArchivesSpace data structure. The new object is then fleshed out +with data and yielded (in most cases) to the block passed to the transform method. + +Since not all Archon records have a 1 to 1 relationship to the ArchivesSpace data +model, there are several models that yield more than 1 object, or that function +in an idiosyncratic way. + +The base class for Archon models is defined in the Archon client library. The base +class contains two types of caches to facilitate the reading of data via the +Archon API. One cache contains raw HTTP response body data from Archon. The other +cache contains instances of the ArchonRecord subclasses. A third, still experimental +cache saves Archon response data to an SQLite database, to facilitate repeated +tests against the same Archon instance. + +The Archon API only provides a paginated listing of records, so ArchonRecord.find is +implemented by reading the entire set until the desired records is found. Hence the +necessity for the caching techniques described above. diff --git a/app/css/main.css b/app/css/main.css index e4aa4326..f72cc888 100644 --- a/app/css/main.css +++ b/app/css/main.css @@ -8,7 +8,16 @@ background: #ddd; } -p { +#status-console div.main { + font-size: 1.1em; +} + +#status-console div.collapsed div.updates { + visibility: hidden; + height: 0px; +} + +p.update, p.error, p.warn { padding-left: 10px; padding-right: 10px; } diff --git a/app/js/main.js b/app/js/main.js index fb13e08e..4b247ea9 100644 --- a/app/js/main.js +++ b/app/js/main.js @@ -34,7 +34,7 @@ $(document).ready(function(){ $("#nodourl").click(function(){ // If checked if ($("#nodourl").is(":checked")) { - //show the hidden div + //show the hidden div $("#do_baseurl").removeAttr('required'); } else { //otherwise, hide it @@ -46,57 +46,100 @@ $(document).ready(function(){ function updateStatus(update, emitter){ -// console.log(update); + console.log(update); if (update.type == 'error') { emitter.show_error(update.body); } else if (update.type == 'status') { - emitter.refresh_status(update.body, update.source); + emitter.add_status(update.body); } else if (update.type == 'warning') { - emitter.show_warning(update.body); + emitter.show_error(update.body); + } else if (update.type == 'update') { + emitter.show_update(update.body, update.source); + } else if (update.type == 'flash') { + emitter.flash(update.body, update.source); } else if (update.type == 'progress') { emitter.show_progress(update.ticks, update.total); - } else if (update.type == 'update') { + } else if (update.type == 'progress_message') { emitter.show_progress_message(update.body); } else if (update.type == 'log') { $('#download-log').attr('href', update.file); - } else { - // todo: toggle in progress bar } } function StatusEmitter() { - var console = $('#status-console'); - - this.refresh_status = function(status, source){ - if (source == 'aspace') { - $("#status-console div:last p.aspace").html(status); - } else { - $("#status-console div:last p.aspace").remove(); - $("#status-console div:last span.progress-message").html(" - Done"); - console.append("

"+status+"

"); + var statusBox = $('#status-console'); + + this.last_status = function() { + return statusBox.children('div.status:last'); + } + + this.add_status = function(status) { + last_status = this.last_status(); + console.log(last_status); + if (last_status.length) { + last_status.addClass("collapsed"); + last_status.children('div.updates').children('p:last').children('span.progress').remove(); + last_status.children('div.updates').children('p.flash').remove(); } + statusBox.append("
"+status+" (+/-)
"); + + last_status = this.last_status(); + toggler = last_status.children('div.main').children('a.toggleUpdates'); + + toggler.on('click', function(e) { + $(this).parent().parent().toggleClass('collapsed'); + }); } - this.show_error = function(error){ - console.addClass('error'); - console.append("

"+error+"

"); + this.show_error = function(body){ + last_status = this.last_status(); + if (!last_status.length) { + this.add_status('Migration Errors'); + last_status = this.last_status(); + } + + html = "

"+body+"

"; + last_status.children('div.updates').append(html); } - this.show_warning = function(warning){ - console.append("

" + warning + "

"); + this.show_update = function(body, source){ + source = typeof source !== 'undefined' ? source : 'migration'; + last_status = this.last_status(); + last_status.children('div.updates').children('p:last').children('span.progress').remove(); + + last_status.children('div.updates').children('p.flash').remove(); + + html = "

" + body + "

"; + last_status.children('div.updates').append(html); } this.show_progress = function(ticks, total) { - var percent = Math.round((ticks / total) * 100); - $("#status-console div:last span.progress").remove(); - $("#status-console div:last p:last").append(" " + percent + "%"); + percent = Math.round((ticks / total) * 100); + last_status = this.last_status(); + + last_status.children('div.updates').children('p:last').children('span.progress').remove(); + html = " " + percent + "%"; + last_status.children('div.updates').children('p:last').append(html); } this.show_progress_message = function(body) { - $("#status-console div:last p:first span.progress-message").remove(); - $("#status-console div:last p:first").append(" - " + body + ""); + $("#status-console div.status:last div.updates span.progress-message").remove(); + $("#status-console div.status:last div.updates p.migration:last").append(" - " + body + ""); } + + this.flash = function(body, source){ + source = typeof source !== 'undefined' ? source : 'migration'; + last_status = this.last_status(); + + last_status.children('div.updates').children('p:last').children('span.progress').remove(); + last_status.children('div.updates').children('p.flash').remove(); + + html = "

" + body + "

"; + last_status.children('div.updates').append(html); + } + + } diff --git a/app/lib/archivesspace_client.rb b/app/lib/archivesspace_client.rb index 382ae6e7..d8f01a7a 100644 --- a/app/lib/archivesspace_client.rb +++ b/app/lib/archivesspace_client.rb @@ -36,8 +36,10 @@ def self.initialized? module HTTP - def init_session - $log.debug("Logging into ArchivesSpace") + def init_session(triesleft = 20) + $log.debug("Attempt logging into ArchivesSpace") + Thread.current[:backend_session] = nil + url = URI("#{@url}/users/#{@user}/login") raise URIException, "URI format error: #{@url}" unless URI::HTTP === url @@ -46,7 +48,13 @@ def init_session response = JSONModel::HTTP.do_http_request(url, req) unless response.code == '200' - raise "Couldn't log into ArchivesSpace and start a session" + if triesleft > 0 + $log.debug("Log in failed: try again in 1 second") + sleep(1) + init_session(triesleft - 1) + else + raise "Giving up: couldn't log into ArchivesSpace and start a session" + end end json = JSON::parse(response.body) @@ -54,6 +62,7 @@ def init_session # for JSONModel Thread.current[:backend_session] = @session + $log.debug("New backend session: #{@session}") end @@ -142,12 +151,16 @@ def import(y) # if cache.empty? && seen_records.empty? if cache.empty? && working_file.size == 0 - $log.warn("Empty batch: aborting, not saving") - return {} + $log.warn("Empty batch: not saving") + return {} end # save the batch $log.debug("Posting import batch") + + init_session # log in before posting a batch + $log.debug("Using session: #{Thread.current[:backend_session]}") + cache.save! do |response| if response.code.to_s == '200' @@ -163,10 +176,12 @@ def import(y) end end rescue JSON::ParserError => e + $log.debug("JSON parse error parsing chunk #{chunk}") y << json_chunk({ :type => 'error', :body => e.to_s }) + return false end end @@ -175,6 +190,7 @@ def import(y) :type => 'error', :body => "ArchivesSpace server error: #{response.code}" }) + return false end end @@ -195,7 +211,7 @@ def normalize_message(message) end elsif message['saved'] && message['saved'].is_a?(Hash) r = { - :type => 'status', + :type => 'update', :source => 'aspace', :body => "Saved #{message['saved'].keys.count} records" } @@ -204,16 +220,16 @@ def normalize_message(message) elsif message['status'].respond_to?(:length) message['status'].each do |status| if status['type'] == 'started' - r = { - :type => 'status', + r = { :source => 'aspace', :body => status['label'], :id => status['id'] } + r[:type] = r[:body] =~ /^Saved/ ? :update : :flash yield r elsif status['type'] == 'refresh' r = { - :type => 'update', + :type => 'flash', :source => 'migration', :body => status['label'], } @@ -246,9 +262,13 @@ def read(chunk) # do nothing because we're treating the response as a stream elsif chunk =~ /\A\n\]\Z/ # the last message doesn't have a comma, so it's a fragment - yield ASUtils.json_parse(@fragments.sub(/\n\Z/, '')) + s = @fragments.sub(/\n\Z/, '') + @fragments = "" + yield ASUtils.json_parse(s) elsif chunk =~ /.*,\n\Z/ - yield ASUtils.json_parse(@fragments + chunk.sub(/,\n\Z/, '')) + s = @fragments + chunk.sub(/,\n\Z/, '') + @fragments = "" + yield ASUtils.json_parse(s) else @fragments << chunk end diff --git a/app/lib/archon_client.rb b/app/lib/archon_client.rb index 476fdce2..0eab6b60 100644 --- a/app/lib/archon_client.rb +++ b/app/lib/archon_client.rb @@ -259,6 +259,10 @@ def self.to_obj(rec) obj.uri = obj.class.uri_for(rec.import_id) end + if obj.respond_to?(:external_ids) && rec['ID'] + obj.external_ids << {:source => "Archon", :external_id => rec["ID"]} + end + obj else raise "error" @@ -267,7 +271,7 @@ def self.to_obj(rec) def self.unspecified(value) - $log.debug("Using unspecified value: #{value}") + # $log.debug("Using unspecified value: #{value}") value end @@ -340,6 +344,7 @@ def pp module HTTP + require 'sqlite3' if Appdata.use_dbcache def http @http ||= Net::HTTP::Persistent.new 'archon_client' @@ -353,18 +358,32 @@ def get_json(endpoint, usecache=true) :max_size => Appdata.archon_page_cache_size, :default => false) + # look at in-memory cache if @http_cache[endpoint] - $log.debug("Using cached page") - json_string = @http_cache[endpoint] - elsif usecache - $log.debug("Caching page") - @http_cache[endpoint] = _get_json(endpoint) json_string = @http_cache[endpoint] + # look at / send to db cache + elsif Appdata.use_dbcache + db = get_db + rows = db.execute( "select * from archon_responses where endpoint like '#{endpoint}'" ) + if rows.count == 1 + $log.debug("Using DB cache for endpoint #{endpoint}") + json_string = rows[0][1] + elsif rows.count == 0 + $log.debug("Adding DB cache for #{endpoint}") + json_string = _get_json(endpoint) + db.execute( "insert into archon_responses (endpoint, json) values (?, ?)", [endpoint, json_string] ) + else + raise "bad database row count" + end else - $log.debug("Fetching page, not caching") json_string = _get_json(endpoint) end + # send to in-memory cache + if usecache + @http_cache[endpoint] = json_string + end + begin json = JSON.parse(json_string) return json @@ -373,7 +392,10 @@ def get_json(endpoint, usecache=true) return nil else $log.debug("Bad Response String: #{json_string}") - raise "Archon response is not JSON!" + msg = "Archon response is not JSON!\n" + msg += "Endpoint: '#{endpoint}'\n" + msg += "See log for illegal JSON string" + raise msg end end end @@ -468,6 +490,18 @@ def _get_json(endpoint) return response.body end end + + # a really dumb database + def get_db + db = SQLite3::Database.new("archon_api_cache.db") + tables = db.execute "select name from sqlite_master where type = 'table'" + unless tables.count == 1 && tables[0][0] == 'archon_responses' + $log.debug("Initializing API cache database") + r = db.execute( "create table archon_responses (endpoint varchar(255), json blob)" ) + end + + db + end end @@ -475,12 +509,16 @@ class Client include HTTP def initialize(opts = {}) + if (opts.keys - [:url, :user, :password]).length > 0 + raise "Bad argument in #{opts.inspect}" + end @url = opts[:url] || Appdata.default_archon_url @user = opts[:user] || Appdata.default_archon_user @password = opts[:password] || Appdata.default_archon_password - init_session + @url.sub!(/\/*$/, '') + init_session Thread.current[:archon_client] = self end diff --git a/app/lib/migrate.rb b/app/lib/migrate.rb index f1322cdf..e0d7fa71 100644 --- a/app/lib/migrate.rb +++ b/app/lib/migrate.rb @@ -125,6 +125,7 @@ def migrate_repository(archon_repo_id, aspace_repo_uri) end # Resource Component Trees + failures = [] Archon.record_type(:collection).each(false) do |data| next unless data['RepositoryID'] == archon_repo_id coll_id = data['ID'] @@ -176,83 +177,97 @@ def migrate_collection_content(repo_id, digital_instance_map, classification_map) - @aspace.repo(repo_id).import(@y) do |batch| - container_trees = {} - position_tracker = {} - position_map = {} - - Archon.record_type(:content).set(coll_id).each do |rec| - import_id = rec.class.import_id_for(rec['ID']) - rec.class.transform(rec) do |obj_or_cont| - if obj_or_cont.is_a?(Array) - cont = obj_or_cont - unless container_trees.has_key?(cont[0]) - container_trees[cont[0]] = [] - end - container_trees[cont[0]] << cont[1] - else - obj = obj_or_cont - set_key = obj.parent.nil? ? nil : obj.parent['ref'] - position_tracker[set_key] ||= {} - position_tracker[set_key][obj.position] ||= [] - position_tracker[set_key][obj.position] << obj.key - - resolve_ids_to_links(rec, obj_or_cont, classification_map) - - # link resource - resource_uri = resource_map[import_id_for(:collection, coll_id)] - obj.resource = {:ref => resource_uri} - # attach digital object instances - if digital_instance_map && digital_instance_map[import_id] - digital_instance_map[import_id].each do |do_uri| - instance = ASpaceImport.JSONModel(:instance).new - instance.instance_type = 'digital_object' - instance.digital_object = { - :ref => do_uri - } - obj.instances << instance + emit_status("Migrating Collection Content #{coll_id}", :update) + + i = 1 + 5.times do + emit_status("Attempt #{i}", :flash) + i += 1 + result = @aspace.repo(repo_id).import(@y) do |batch| + container_trees = {} + position_tracker = {} + position_map = {} + + Archon.record_type(:content).set(coll_id).each do |rec| + import_id = rec.class.import_id_for(rec['ID']) + rec.class.transform(rec) do |obj_or_cont| + if obj_or_cont.is_a?(Array) + cont = obj_or_cont + unless container_trees.has_key?(cont[0]) + container_trees[cont[0]] = [] + end + container_trees[cont[0]] << cont[1] + else + obj = obj_or_cont + set_key = obj.parent.nil? ? nil : obj.parent['ref'] + position_tracker[set_key] ||= {} + position_tracker[set_key][obj.position] ||= [] + position_tracker[set_key][obj.position] << obj.key + + resolve_ids_to_links(rec, obj_or_cont, classification_map) + + # link resource + resource_uri = resource_map[import_id_for(:collection, coll_id)] + obj.resource = {:ref => resource_uri} + # attach digital object instances + emit_status("Attaching digital object instances to Content record #{obj.title}", :flash) + if digital_instance_map && digital_instance_map[import_id] + digital_instance_map[import_id].each do |do_uri| + instance = ASpaceImport.JSONModel(:instance).new + instance.instance_type = 'digital_object' + instance.digital_object = { + :ref => do_uri + } + obj.instances << instance + end end - end - batch.unshift(obj_or_cont) + batch.unshift(obj_or_cont) + end end end - end - # it might not be a bad idea to move this - # to aspace one day - position_tracker.each do |id, map| - sorted = map.keys.sort - sorted.each_with_index do |padded_position, real_position| - map[padded_position].each do |obj_key| - position_map[obj_key] = real_position + # it might not be a bad idea to move this + # to aspace one day + emit_status("Adjusting positions for Content records in Collection #{coll_id}", :flash) + position_tracker.each do |id, map| + sorted = map.keys.sort + sorted.each_with_index do |padded_position, real_position| + map[padded_position].each do |obj_key| + position_map[obj_key] = real_position + end end end - end + emit_status("Done adjusting positions", :flash) - batch.each do |obj| - if position_map.has_key?(obj.key) - obj.position = position_map[obj.key] - else - obj.position = nil - end + emit_status("Matching Content records to Containers", :flash) + batch.each do |obj| + if position_map.has_key?(obj.key) + obj.position = position_map[obj.key] + else + obj.position = nil + end - if (container_data_sets = container_trees[obj.key]) - container_data_sets.each do |container_data| - container = ASpaceImport.JSONModel(:container).new - container_data.each_with_index do |data, i| - container.send("type_#{i+1}=", (data[:type] || "unknown")) - container.send("indicator_#{i+1}=", data[:indicator]) - end - - instance = ASpaceImport.JSONModel(:instance).new - instance.container = container - instance.instance_type = 'text' + if (container_data_sets = container_trees[obj.key]) + container_data_sets.each do |container_data| + container = ASpaceImport.JSONModel(:container).new + container_data.each_with_index do |data, i| + container.send("type_#{i+1}=", (data[:type] || "unknown")) + container.send("indicator_#{i+1}=", data[:indicator]) + end + + instance = ASpaceImport.JSONModel(:instance).new + instance.container = container + instance.instance_type = 'text' - obj.instances << instance + obj.instances << instance + end end end + emit_status("Done matching Content records to Containers", :flash) end + + break unless result == false end end @@ -272,9 +287,10 @@ def migrate_repo_records def migrate_users emit_status("Migrating User records") + i = 0; Archon.record_type(:user).each do |rec| - + i = i + 1; rec.class.transform(rec) do |obj| my_groups = [] @@ -301,6 +317,9 @@ def migrate_users $log.debug("Save User result: #{result}") end + if i.modulo(10) == 0 + emit_status("Saved #{i} records", :flash); + end end end @@ -313,11 +332,16 @@ def migrate_creators_and_subjects :creator ].each do |key| + i = 0; Archon.record_type(key).each do |rec| $log.debug("Migrating Record: #{rec.inspect}") rec.class.transform(rec) do |obj| batch << obj end + i += 1 + if i.modulo(100) == 0 + emit_status("#{i} Archon records have been read", :flash) + end end end end @@ -401,6 +425,7 @@ def migrate_digital_objects(repo_id, coll_id, classification_map) end end end + $log.debug(do_map.inspect); Hash[instance_map.map{|k, v| [k, v.map{|import_id| do_map[import_id]}]}] end diff --git a/app/lib/migration_helpers.rb b/app/lib/migration_helpers.rb index 10b177bf..62e708c4 100644 --- a/app/lib/migration_helpers.rb +++ b/app/lib/migration_helpers.rb @@ -21,15 +21,6 @@ def map_group_id(old_group_id) end - # def bounded_containers(container_data) - # if container_data.length > 2 - # container_data[-3..-1] - # else - # container_data - # end - # end - - def get_classification_import_id(rec) id = if rec.has_key?('Classifications') rec['Classifications'][0] diff --git a/app/main.rb b/app/main.rb index b889ae9f..525abf92 100644 --- a/app/main.rb +++ b/app/main.rb @@ -1,3 +1,9 @@ + +unless RUBY_VERSION =~ /1\.9\.3/ + puts "You are using an unsupported version of Ruby. Please read the README and try again" + raise "Cannot support #{RUBY_VERSION}" +end + require 'sinatra' require 'sinatra/assetpack' @@ -81,4 +87,3 @@ end end end - diff --git a/app/models/archon_accession.rb b/app/models/archon_accession.rb index 16dbfd49..f2c19d0e 100644 --- a/app/models/archon_accession.rb +++ b/app/models/archon_accession.rb @@ -23,7 +23,7 @@ def self.transform(rec) }) end - if rec['ReceivedExtent'] + if rec['ReceivedExtent'] && rec['ReceivedExtent'].is_a?(String) obj.extents << model(:extent, { :number => rec['ReceivedExtent'], diff --git a/app/models/archon_content.rb b/app/models/archon_content.rb index ac7118cd..d59d4af8 100644 --- a/app/models/archon_content.rb +++ b/app/models/archon_content.rb @@ -1,6 +1,7 @@ Archon.record_type(:content) do plural 'content' no_html 'Title' + corresponding_record_type :archival_object def self.endpoint(start = 1) raise "Collection not specified" unless @cid @@ -38,8 +39,7 @@ def self.transform(rec) def self.to_archival_object(rec) - obj = model(:archival_object).new - obj.uri = obj.class.uri_for(rec.import_id) + obj = to_obj(rec) obj.key = rec['ID'] obj.level = rec['EADLevel'] @@ -207,6 +207,7 @@ def self.nearest_non_physical_ancestor(parent_id) def self.figure_out_position(rec, position=nil, xtra = []) position = rec['SortOrder'] unless position + position = pad(position, 3) parent_id = rec['ParentID'] return position.to_i if parent_id == '0' || parent_id.nil? @@ -223,10 +224,11 @@ def self.figure_out_position(rec, position=nil, xtra = []) position = "#{position}#{pad(xtra.shift, 4)}" end - # normalize all integers to 13 decimals + # normalize all integers to 16 decimal space # and assume no not-merely-physical node # will have 3 physical-only ancestors in a row - position = pad(position, 13, :right) + # (3 x 4) + 4 = 16 + position = pad(position, 16, :right) return position.to_i end diff --git a/app/models/archon_creator.rb b/app/models/archon_creator.rb index 30a37c44..1927ffbe 100644 --- a/app/models/archon_creator.rb +++ b/app/models/archon_creator.rb @@ -67,6 +67,10 @@ def self.transform(rec) obj.uri = obj.class.uri_for(rec.import_id) + if obj.respond_to?(:external_ids) && rec['ID'] + obj.external_ids << {:source => "Archon", :external_id => rec["ID"]} + end + if rec['BiogHist'] note = model(:note_bioghist).new diff --git a/app/models/archon_mixins.rb b/app/models/archon_mixins.rb index 7a31e27f..a5a9cdb4 100644 --- a/app/models/archon_mixins.rb +++ b/app/models/archon_mixins.rb @@ -65,6 +65,10 @@ def transform_location(loc) obj = model(:location).new obj.building = loc['Location'] + if obj.respond_to?(:external_ids) && rec['ID'] + obj.external_ids << {:source => "Archon", :external_id => rec["ID"]} + end + loc_keys = %w(RangeValue Section Shelf) i = 1 loc_keys.each do |k| diff --git a/app/models/archon_subject.rb b/app/models/archon_subject.rb index ce532ddb..82e02552 100644 --- a/app/models/archon_subject.rb +++ b/app/models/archon_subject.rb @@ -16,6 +16,11 @@ def self.transform(rec) end obj.uri = obj.class.uri_for(rec.import_id) + + if obj.respond_to?(:external_ids) && rec['ID'] + obj.external_ids << {:source => "Archon", :external_id => rec["ID"]} + end + yield obj end diff --git a/config/config.rb b/config/config.rb index 94059ab2..199f03d6 100644 --- a/config/config.rb +++ b/config/config.rb @@ -30,7 +30,8 @@ def config(&block) :default_aspace_user, :default_aspace_password, :mode, - :app_dir + :app_dir, + :use_dbcache Appdata.aspace_version 'v1.0.0' @@ -55,6 +56,11 @@ def config(&block) # has sufficient memory Appdata.archon_page_cache_size 400 +Appdata.use_dbcache false + + + + if File.exists?(File.join(File.dirname(__FILE__), 'config_local.rb')) require_relative('config_local.rb') end diff --git a/spec/archivesspace_client_spec.rb b/spec/archivesspace_client_spec.rb new file mode 100644 index 00000000..586ea8c7 --- /dev/null +++ b/spec/archivesspace_client_spec.rb @@ -0,0 +1,34 @@ +require_relative 'spec_helper' + +describe "ResponseReader" do + + before(:all) do + @rr = ArchivesSpace::ResponseReader.new + end + + it "can managed fragmentary messages coming from ASpace" do + + # ripped straight from the headlines + message_stream = [ + "{\"status\":[{\"id\":1,\"label\":\"Reading JSON records\",\"type\":\"started\"},{\"id\":1,\"label\":\"Reading JSON records\",\"type\":\"done\"},{\"id\":2,\"label\":\"Validating records and checking links\",\"type\":\"started\"},{\"id\":2,\"label\":\"Validating records and checking links\",\"type\":\"done\"},{\"id\":3,\"label\":\"Evaluating record relationships\",\"type\":\"started\"},{\"id\":3,\"label\":\"Evaluating record relationships\",\"type\":\"done\"},{\"id\":4,\"label\":\"Saving records: cycle 1\",\"type\":\"started\"},{\"id\":4,\"label\":\"Saving records: cycle 1\",\"type\":\"done\"},{\"id\":5,\"label\":\"Dealing with circular dependencies: cycle 1\",\"type\":\"started\"},{\"id\":5,\"label\":\"Dealing with circular dependencies: cycle 1\",\"type\":\"done\"},{\"id\":6,\"label\":\"Saving records: cycle 2\",\"type\":\"started\"},{\"id\":6,\"label\":\"Saving records: cycle 2\",\"type\":\"done\"},{\"id\":7,\"label\":\"Dealing with circular dependencies: cycle 2\",\"type\":\"started\"},{\"id\":7,\"label\":\"Dealing with circular dependencies: cycle 2\",\"type\":\"done\"},{\"id\":8,\"label\":\"Saving records: cycle 3\",\"type\":\"started\"},{\"id\":8,\"label\":\"Saving records: cycle 3\",\"type\":\"done\"},{\"id\":9,\"label\":\"Dealing with circular dependencies: cycle 3\",\"type\":\"started\"},{\"id\":9,\"label\":\"Dealing with circular dependencies: cycle 3\",\"type\":\"done\"},{\"id\":10,\"label\":\"Saving records: cycle 4\",\"type\":\"started\"},{\"id\":10,\"label\":\"Saving records: cycle 4\",\"type\":\"done\"},{\"id\":11,\"label\":\"Dealing with circular dependencies: cycle 4\",\"type\":\"started\"},{\"id\":11,\"label\":\"Dealing with circular dependencies: cycle 4\",\"type\":\"done\"},{\"id\":12,\"label\":\"Saving records: cycle 5\",\"type\":\"started\"},{\"id\":12,\"label\":\"Saving records: cycle 5\",\"type\":\"done\"},{\"id\":13,\"label\":\"Dealing with circular dependencies: cycle 5\",\"type\":\"started\"},{\"id\":13,\"label\":\"Dealing with circular dependencies: cycle 5\",\"type\":\"done\"},{\"id\":14,\"label\":\"Saving records: cycle 6\",\"type\":\"started\"},{\"id\":14,\"label\":\"Saving records: cycle 6\",\"type\":\"done\"},{\"id\":15,\"label\":\"Dealing with circular dependencies: cycle 6\",\"type\":\"started\"},{\"id\":15,\"label\":\"Dealing with circular dependencies: cycle 6\",\"type\":\"done\"},{\"id\":16,\"label\":\"Saving records: cycle 7\",\"type\":\"started\"},{\"id\":16,\"label\":\"Saving records: cycle 7\",\"type\":\"done\"},{\"id\":17,\"label\":\"Dealing with circular dependencies: cycle 7\",\"type\":\"started\"},{\"id\":17,\"label\":\"Dealing with circular dependencies: cycle 7\",\"type\":\"done\"},{\"id\":18,\"label\":\"Saving records: cycle 8\",\"type\":\"started\"},{\"id\":18,\"label\":\"Saving records: cycle 8\",\"type\":\"done\"},{\"id\":19,\"label\":\"Dealing with circular dependencies: cycle 8\",\"type\":\"started\"},{\"id\":19,\"label\":\"Dealing with circular dependencies: cycle 8\",\"type\":\"done\"},{\"id\":20,\"label\":\"Saving records: cycle 9\",\"type\":\"started\"},{\"id\":20,\"label\":\"Saving records: cycle 9\",\"type\":\"done\"},{\"id\":21,\"label\":\"Dealing with circular dependencies: cycle 9\",\"type\":\"started\"},{\"id\":21,\"label\":\"Dealing with circular dependencies: cycle 9\",\"type\":\"done\"},{\"id\":22,\"label\":\"Saving records: cycle 10\",\"type\":\"started\"},{\"id\":22,\"label\":\"Saving records: cycle 10\",\"type\":\"done\"},{\"id\":23,\"label\":\"Dealing with circular dependencies: cycle 10\",\"type\":\"started\"},{\"id\":23,\"label\":\"Dealing with circular dependencies: cycle 10\",\"type\":\"done\"},{\"id\":24,\"label\":\"Saving records: cycle 11\",\"type\":\"started\"},{\"id\":24,\"label\":\"Saving records: cycle 11\",\"type\":\"done\"},{\"id\":25,\"label\":\"Dealing with circular dependencies: cycle 11\",\"type\":\"started\"},{\"id\":25,\"label\":\"Dealing with circular dependencies: cycle 11\",\"type\":\"done\"},{\"id\":26,\"label\":\"Saving records: cycle 12\",\"type\":\"started\"},{\"id\":26,\"label\":\"Saving records: cycle 12\",\"type\":\"done\"},{\"id\":27,\"label\":\"Dealing with circular dependencies: cycle 12\",\"type\":\"started\"},{\"id\":27,\"label\":\"Dealing with circular dependencies: cycle 12\",\"type\":\"done\"},{\"id\":28,\"label\":\"Saving records: cycle 13\",\"type\":\"started\"},{\"id\":28,\"label\":\"Saving records: cycle 13\",\"type\":\"done\"},{\"id\":29,\"label\":\"Dealing with circular dependencies: cycle 13\",\"type\":\"started\"},{\"id\":29,\"label\":\"Dealing with circular dependencies: cycle 13\",\"type\":\"done\"},{\"id\":30,\"label\":\"Saving records: cycle 14\",\"type\":\"started\"},{\"id\":30,\"label\":\"Saving records: cycle 14\",\"type\":\"done\"},{\"id\":31,\"label\":\"Dealing with circular dependencies: cycle 14\",\"type\":\"started\"},{\"id\":31,\"label\":\"Dealing with circular dependencies: cycle 14\",\"type\":\"done\"},{\"id\":32,\"label\":\"Saving records: cycle 15\",", + "\"type\":\"started\"},{\"id\":32,\"label\":\"Saving records: cycle 15\",\"type\":\"done\"},{\"id\":33,\"label\":\"Dealing with circular dependencies: cycle 15\",\"type\":\"started\"},{\"id\":33,\"label\":\"Dealing with circular dependencies: cycle 15\",\"type\":\"done\"},{\"id\":34,\"label\":\"Saving records: cycle 16\",\"type\":\"started\"},{\"id\":34,\"label\":\"Saving records: cycle 16\",\"type\":\"done\"},{\"id\":35,\"label\":\"Dealing with circular dependencies: cycle 16\",\"type\":\"started\"},{\"id\":35,\"label\":\"Dealing with circular dependencies: cycle 16\",\"type\":\"done\"},{\"id\":36,\"label\":\"Saving records: cycle 17\",\"type\":\"started\"},{\"id\":36,\"label\":\"Saving records: cycle 17\",\"type\":\"done\"},{\"id\":37,\"label\":\"Dealing with circular dependencies: cycle 17\",\"type\":\"started\"},{\"id\":37,\"label\":\"Dealing with circular dependencies: cycle 17\",\"type\":\"done\"},{\"id\":38,\"label\":\"Saving records: cycle 18\",\"type\":\"started\"},{\"id\":38,\"label\":\"Saving records: cycle 18\",\"type\":\"done\"},{\"id\":39,\"label\":\"Dealing with circular dependencies: cycle 18\",\"type\":\"started\"},{\"id\":39,\"label\":\"Dealing with circular dependencies: cycle 18\",\"type\":\"done\"},{\"id\":40,\"label\":\"Saving records: cycle 19\",\"type\":\"started\"},{\"id\":40,\"label\":\"Saving records: cycle 19\",\"type\":\"done\"},{\"id\":41,\"label\":\"Dealing with circular dependencies: cycle 19\",\"type\":\"started\"},{\"id\":41,\"label\":\"Dealing with circular dependencies: cycle 19\",\"type\":\"done\"},{\"id\":42,\"label\":\"Saving records: cycle 20\",\"type\":\"started\"},{\"id\":42,\"label\":\"Saving records: cycle 20\",\"type\":\"done\"},{\"id\":43,\"label\":\"Dealing with circular dependencies: cycle 20\",\"type\":\"started\"},{\"id\":43,\"label\":\"Dealing with circular dependencies: cycle 20\",\"type\":\"done\"},{\"id\":44,\"label\":\"Saving records: cycle 21\",\"type\":\"started\"},{\"id\":44,\"label\":\"Saving records: cycle 21\",\"type\":\"done\"},{\"id\":45,\"label\":\"Dealing with circular dependencies: cycle 21\",\"type\":\"started\"},{\"id\":45,\"label\":\"Dealing with circular dependencies: cycle 21\",\"type\":\"done\"},{\"id\":46,\"label\":\"Saving records: cycle 22\",\"type\":\"started\"},{\"id\":46,\"label\":\"Saving records: cycle 22\",\"type\":\"done\"},{\"id\":47,\"label\":\"Dealing with circular dependencies: cycle 22\",\"type\":\"started\"},{\"id\":47,\"label\":\"Dealing with circular dependencies: cycle 22\",\"type\":\"done\"},{\"id\":48,\"label\":\"Saving records: cycle 23\",\"type\":\"started\"},{\"id\":48,\"label\":\"Saving records: cycle 23\",\"type\":\"done\"},{\"id\":49,\"label\":\"Dealing with circular dependencies: cycle 23\",\"type\":\"started\"},{\"id\":49,\"label\":\"Dealing with circular dependencies: cycle 23\",\"type\":\"done\"},{\"id\":50,\"label\":\"Saving records: cycle 24\",\"type\":\"started\"},{\"id\":50,\"label\":\"Saving records: cycle 24\",\"type\":\"done\"},{\"id\":51,\"label\":\"Dealing with circular dependencies: cycle 24\",\"type\":\"started\"},{\"id\":51,\"label\":\"Dealing with circular dependencies: cycle 24\",\"type\":\"done\"},{\"id\":52,\"label\":\"Saving records: cycle 25\",\"type\":\"started\"},{\"id\":52,\"label\":\"Saving records: cycle 25\",\"type\":\"done\"},{\"id\":53,\"label\":\"Cleaning up\",\"type\":\"started\"},{\"id\":53,\"label\":\"Cleaning up\",\"type\":\"done\"}]},\n", + "{\"saved\":{\"/repositories/2/digital_objects/import_-4002584007866701510-4262\":[\"/repositories/2/digital_objects/596\",596],\"/repositories/2/digital_objects/import_-4002584007866701510-4261\":[\"/repositories/2/digital_objects/597\",597],\"/repositories/2/digital_object_components/import_-1670716676874793766-4266\":[\"/repositories/2/digital_object_components/567\",567],\"/repositories/2/digital_object_components/import_-1670716676874793766-4267\":[\"/repositories/2/digital_object_components/565\",565],\"/repositories/2/digital_object_components/import_-1670716676874793766-4268\":[\"/repositories/2/digital_object_components/563\",563],\"/repositories/2/digital_object_components/import_-1670716676874793766-4269\":[\"/repositories/2/digital_object_components/561\",561],\"/repositories/2/digital_object_components/import_-1670716676874793766-4282\":[\"/repositories/2/digital_object_components/589\",589],\"/repositories/2/digital_object_components/import_-1670716676874793766-4283\":[\"/repositories/2/digital_object_components/588\",588],\"/repositories/2/digital_object_components/import_-1670716676874793766-4284\":[\"/repositories/2/digital_object_components/587\",587],\"/repositories/2/digital_object_components/import_-1670716676874793766-4285\":[\"/repositories/2/digital_object_components/586\",586],\"/repositories/2/digital_object_components/import_-1670716676874793766-4286\":[\"/repositories/2/digital_object_components/585\",585],\"/repositories/2/digital_object_components/import_-1670716676874793766-4287\":[\"/repositories/2/digital_object_components/584\",584],\"/repositories/2/digital_object_components/import_-1670716676874793766-4288\":[\"/repositories/2/digital_object_components/583\",583],\"/repositories/2/digital_object_components/import_-1670716676874793766-4289\":[\"/repositories/2/digital_object_components/582\",582],\"/repositories/2/digital_object_components/import_-1670716676874793766-4290\":[\"/repositories/2/digital_object_components/581\",581],\"/repositories/2/digital_object_components/import_-1670716676874793766-4291\":[\"/repositories/2/digital_object_components/580\",580],\"/repositories/2/digital_object_components/import_-1670716676874793766-4292\":[\"/repositories/2/digital_object_components/579\",579],\"/repositories/2/digital_object_components/import_-1670716676874793766-4293\":[\"/repositories/2/digital_object_components/578\",578],\"/repositories/2/digital_object_components/import_-1670716676874793766-4294\":[\"/repositories/2/digital_object_components/577\",577],\"/repositories/2/digital_object_components/import_-1670716676874793766-4295\":[\"/repositories/2/digital_object_components/576\",576],\"/repositories/2/digital_object_components/import_-1670716676874793766-4296\":[\"/repositories/2/digital_object_components/575\",575],\"/repositories/2/digital_object_components/import_-1670716676874793766-4297\":[\"/repositories/2/digital_object_components/574\",574],\"/repositories/2/digital_object_components/import_-1670716676874793766-4298\":[\"/repositories/2/digital_object_components/573\",573],\"/repositories/2/digital_object_components/import_-1670716676874793766-4299\":[\"/repositories/2/digital_object_components/572\",572],\"/repositories/2/digital_object_components/import_-1670716676874793766-4300\":[\"/repositories/2/digital_object_components/571\",571],\"/repositories/2/digital_object_components/import_-1670716676874793766-4301\":[\"/repositories/2/digital_object_components/570\",570],\"/repositories/2/digital_object_components/import_-1670716676874793766-4302\":[\"/repositories/2/digital_object_components/569\",569],\"/repositories/2/digital_object_components/import_-1670716676874793766-4303\":[\"/repositories/2/digital_object_components/568\",568],\"/repositories/2/digital_object_components/import_-1670716676874793766-4304\":[\"/repositories/2/digital_object_components/566\",566],\"/repositories/2/digital_object_components/import_-1670716676874793766-4305\":[\"/repositories/2/digital_object_components/564\",564],\"/repositories/2/digital_object_components/import_-1670716676874793766-4306\":[\"/repositories/2/digital_object_components/562\",562]}}\n", + "\n]" + ] + + message_stream.each do |chunk| + @rr.read(chunk) do |data| + # all the data should be a hash, naturally + data.should be_a(Hash) + end + end + + message_stream.each do |chunk| + @rr.read(chunk) do |data| + # if we yielded something, the @fragements cache should be empty + @rr.instance_variable_get(:@fragments).should be_empty + end + end + end +end +