From cce29cccb7c1fc62a8ed91942f5808ae8ab4873e Mon Sep 17 00:00:00 2001
From: Nathan Stevens <ns96@nyu.edu>
Date: Fri, 6 Mar 2015 13:19:18 -0500
Subject: [PATCH] Fix for subject source not migrating correctly with faceted
 subject terms

---
 app/models/archon_digitalfile.rb | 22 +++++++++++++++++++++-
 app/models/archon_subject.rb     | 11 +++++++++--
 app/views/index.erb              |  2 +-
 config/config.rb                 |  2 +-
 4 files changed, 32 insertions(+), 5 deletions(-)
diff --git a/app/models/archon_digitalfile.rb b/app/models/archon_digitalfile.rb
index 993295aa..cf1c936d 100644
--- a/app/models/archon_digitalfile.rb
+++ b/app/models/archon_digitalfile.rb
@@ -26,7 +26,27 @@ def self.unique_filename(basename, id)
               else
                 "#{basename}.#{i}"
               end
-    newname
+
+    # need to remove special characters from filename
+    sanitize_filename(newname)
+  end
+
+
+  # http://stackoverflow.com/questions/1939333/how-to-make-a-ruby-string-safe-for-a-filesystem
+  def self.sanitize_filename(filename)
+    # Split the name when finding a period which is preceded by some
+    # character, and is followed by some character other than a period,
+    # if there is no following period that is followed by something
+    # other than a period (yeah, confusing, I know)
+    fn = filename.split /(?<=.)\.(?=[^.])(?!.*\.[^.])/m
+
+    # We now have one or two parts (depending on whether we could find
+    # a suitable period). For each of these parts, replace any unwanted
+    # sequence of characters with an underscore
+    fn.map! { |s| s.gsub /[^a-z0-9\-]+/i, '_' }
+
+    # Finally, join the parts with a period and return the result
+    return fn.join '.'
   end
 
 
diff --git a/app/models/archon_subject.rb b/app/models/archon_subject.rb
index 82e02552..e046435b 100644
--- a/app/models/archon_subject.rb
+++ b/app/models/archon_subject.rb
@@ -12,7 +12,7 @@ def self.transform(rec)
       obj.terms = terms
       obj.external_ids = [{:external_id => rec["ID"], :source => "Archon"}]
       obj.vocabulary = '/vocabularies/1'
-      obj.source = get_source(rec["SubjectSourceID"])
+      obj.source = get_source(get_source_id(rec))
     end
 
     obj.uri = obj.class.uri_for(rec.import_id)
@@ -24,6 +24,13 @@ def self.transform(rec)
     yield obj
   end
 
+  def self.get_source_id(rec)
+    if rec['SubjectSourceID'] && rec['ParentID'] == '0'
+      return rec['SubjectSourceID']
+    else
+      get_source_id(rec['Parent'])
+    end
+  end
 
   def self.get_source(id)
     rec = Archon.record_type(:subjectsource).find(id)
@@ -85,7 +92,7 @@ def self.transform_to_agent(rec)
 
   def self.name_template(rec)
     hsh = super
-    hsh.merge({:source => get_source(rec['SubjectSourceID'])})
+    hsh.merge({:source => get_source(get_source_id(rec))})
   end
     
 end
diff --git a/app/views/index.erb b/app/views/index.erb
index c230dab2..ce04282b 100644
--- a/app/views/index.erb
+++ b/app/views/index.erb
@@ -1,4 +1,4 @@
-<h1>Archon Migration Service 1.0.2 (10-23-2014)</h1>
+<h1>Archon Migration Service 1.0.3B1 (03-06-2015)</h1>
 <div class="pure-g">
 <div id="form-wrapper" class="pure-u-1-2">
 <%= ERB.new(File.read(File.dirname(__FILE__)+'/jobs/new.erb')).result(binding) %>
diff --git a/config/config.rb b/config/config.rb
index 2c0afabd..8bbc98b1 100644
--- a/config/config.rb
+++ b/config/config.rb
@@ -54,7 +54,7 @@ def config(&block)
 # Collection, divided by 100. So, e.g., if you have a Collection
 # with 50,000 Content records, set this to 500 if your server
 # has sufficient memory
-Appdata.archon_page_cache_size          400 
+Appdata.archon_page_cache_size          600
 
 Appdata.use_dbcache											false