From cce29cccb7c1fc62a8ed91942f5808ae8ab4873e Mon Sep 17 00:00:00 2001 From: Nathan Stevens Date: Fri, 6 Mar 2015 13:19:18 -0500 Subject: [PATCH] Fix for subject source not migrating correctly with faceted subject terms --- app/models/archon_digitalfile.rb | 22 +++++++++++++++++++++- app/models/archon_subject.rb | 11 +++++++++-- app/views/index.erb | 2 +- config/config.rb | 2 +- 4 files changed, 32 insertions(+), 5 deletions(-) diff --git a/app/models/archon_digitalfile.rb b/app/models/archon_digitalfile.rb index 993295aa..cf1c936d 100644 --- a/app/models/archon_digitalfile.rb +++ b/app/models/archon_digitalfile.rb @@ -26,7 +26,27 @@ def self.unique_filename(basename, id) else "#{basename}.#{i}" end - newname + + # need to remove special characters from filename + sanitize_filename(newname) + end + + + # http://stackoverflow.com/questions/1939333/how-to-make-a-ruby-string-safe-for-a-filesystem + def self.sanitize_filename(filename) + # Split the name when finding a period which is preceded by some + # character, and is followed by some character other than a period, + # if there is no following period that is followed by something + # other than a period (yeah, confusing, I know) + fn = filename.split /(?<=.)\.(?=[^.])(?!.*\.[^.])/m + + # We now have one or two parts (depending on whether we could find + # a suitable period). For each of these parts, replace any unwanted + # sequence of characters with an underscore + fn.map! { |s| s.gsub /[^a-z0-9\-]+/i, '_' } + + # Finally, join the parts with a period and return the result + return fn.join '.' end diff --git a/app/models/archon_subject.rb b/app/models/archon_subject.rb index 82e02552..e046435b 100644 --- a/app/models/archon_subject.rb +++ b/app/models/archon_subject.rb @@ -12,7 +12,7 @@ def self.transform(rec) obj.terms = terms obj.external_ids = [{:external_id => rec["ID"], :source => "Archon"}] obj.vocabulary = '/vocabularies/1' - obj.source = get_source(rec["SubjectSourceID"]) + obj.source = get_source(get_source_id(rec)) end obj.uri = obj.class.uri_for(rec.import_id) @@ -24,6 +24,13 @@ def self.transform(rec) yield obj end + def self.get_source_id(rec) + if rec['SubjectSourceID'] && rec['ParentID'] == '0' + return rec['SubjectSourceID'] + else + get_source_id(rec['Parent']) + end + end def self.get_source(id) rec = Archon.record_type(:subjectsource).find(id) @@ -85,7 +92,7 @@ def self.transform_to_agent(rec) def self.name_template(rec) hsh = super - hsh.merge({:source => get_source(rec['SubjectSourceID'])}) + hsh.merge({:source => get_source(get_source_id(rec))}) end end diff --git a/app/views/index.erb b/app/views/index.erb index c230dab2..ce04282b 100644 --- a/app/views/index.erb +++ b/app/views/index.erb @@ -1,4 +1,4 @@ -

Archon Migration Service 1.0.2 (10-23-2014)

+

Archon Migration Service 1.0.3B1 (03-06-2015)

<%= ERB.new(File.read(File.dirname(__FILE__)+'/jobs/new.erb')).result(binding) %> diff --git a/config/config.rb b/config/config.rb index 2c0afabd..8bbc98b1 100644 --- a/config/config.rb +++ b/config/config.rb @@ -54,7 +54,7 @@ def config(&block) # Collection, divided by 100. So, e.g., if you have a Collection # with 50,000 Content records, set this to 500 if your server # has sufficient memory -Appdata.archon_page_cache_size 400 +Appdata.archon_page_cache_size 600 Appdata.use_dbcache false