From 5d9dd9e4accd1411d4f08e1f492e68a0fc53c7ae Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Wed, 14 Oct 2020 20:22:01 -0600 Subject: [PATCH 1/9] HOCR import and publication of annotationlist --- Gemfile | 4 + lib/tasks/annotations.rake | 16 ++ lib/tasks/import.rake | 20 +++ lib/tasks/import/hocr.rb | 142 ++++++++++++++++++ lib/wax_tasks.rb | 2 + lib/wax_tasks/annotation.rb | 32 ++++ lib/wax_tasks/annotationlist.rb | 58 +++++++ lib/wax_tasks/collection.rb | 6 +- lib/wax_tasks/collection/annotations.rb | 137 +++++++++++++++++ lib/wax_tasks/item.rb | 9 +- lib/wax_tasks/site.rb | 13 ++ spec/sample_hocr/img_item_1.hocr | 23 +++ spec/sample_hocr/manifest.json | 49 ++++++ ...t_collection_img_item_1_ocr_paragraph.yaml | 7 + spec/sample_site/_config.yml | 2 + spec/wax_tasks/annotations_spec.rb | 28 ++++ spec/wax_tasks/hocr_spec.rb | 53 +++++++ spec/wax_tasks/site_spec.rb | 42 ++++++ 18 files changed, 641 insertions(+), 2 deletions(-) create mode 100644 lib/tasks/annotations.rake create mode 100644 lib/tasks/import.rake create mode 100644 lib/tasks/import/hocr.rb create mode 100644 lib/wax_tasks/annotation.rb create mode 100644 lib/wax_tasks/annotationlist.rb create mode 100644 lib/wax_tasks/collection/annotations.rb create mode 100644 spec/sample_hocr/img_item_1.hocr create mode 100644 spec/sample_hocr/manifest.json create mode 100644 spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml create mode 100644 spec/wax_tasks/annotations_spec.rb create mode 100644 spec/wax_tasks/hocr_spec.rb diff --git a/Gemfile b/Gemfile index bf636a6..da5890c 100644 --- a/Gemfile +++ b/Gemfile @@ -3,9 +3,13 @@ source 'https://rubygems.org' gemspec +gem 'addressable' + # dev/test utilities gem 'bundle-audit', require: false +gem 'byebug', require: false gem 'diane', require: false +gem 'nokogiri', require: false gem 'rubocop', require: false gem 'simplecov', '0.17.1', require: false gem 'yard', require: false diff --git a/lib/tasks/annotations.rake b/lib/tasks/annotations.rake new file mode 100644 index 0000000..fc223d3 --- /dev/null +++ b/lib/tasks/annotations.rake @@ -0,0 +1,16 @@ +# frozen_string_literal: true + +require 'wax_tasks' + +namespace :wax do + desc 'generate annotationlists from local yaml files' + task :annotations do + args = ARGV.drop(1).each { |a| task a.to_sym } + args.reject! { |a| a.start_with? '-' } + + raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:annotations'").magenta if args.empty? + + site = WaxTasks::Site.new + args.each { |a| site.generate_annotations(a) } + end +end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 0000000..dca297d --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +require_relative './import/hocr.rb' + +require 'byebug' + +namespace :wax do + namespace :import do + task :hocr, [:hocr_path, :collection, :canvas, :granularity] do |_t, args| + desc 'generate canvas-level annotationlist yaml file from hocr file' + + # TODO: validate args + + hocr_annotations = WaxTasks::HocrOpenAnnotationCreator.new(args) + hocr_annotations.save + + puts 'done' + end + end +end diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb new file mode 100644 index 0000000..897fcbf --- /dev/null +++ b/lib/tasks/import/hocr.rb @@ -0,0 +1,142 @@ +require 'nokogiri' +require 'addressable/template' +require 'json' +require 'yaml' +require 'byebug' + +# adapted from Okracoke: +# https://github.com/NCSU-Libraries/ocracoke/blob/master/app/processing_helpers/hocr_open_annotation_creator.rb + +class HocrOpenAnnotationCreator + + def initialize(args) + @canvas_uri = "{{ '/' | absolute_url }}\ +img/derivatives/iiif/canvas/\ +#{args[:collection]}_#{args[:canvas]}.json" + + @hocr = File.open(args[:hocr_path]){ |f| Nokogiri::XML(f) } + @collection = args[:collection] + @identifier = args[:canvas] + @granularity = args[:granularity] + + @selector = get_selector + end + + def manifest_canvas_on_xywh(id, xywh) + @canvas_uri + "#xywh=#{xywh}" + end + + def get_selector + if @granularity == "word" + "ocrx_word" + elsif @granularity == "line" + "ocr_line" + elsif @granularity == "paragraph" + "ocr_par" + else + "" + end + end + + def resources + @hocr.xpath(".//*[contains(@class, '#{@selector}')]").map do |chunk| + text = chunk.text().gsub("\n", ' ').squeeze(' ').strip + if !text.empty? + title = chunk['title'] + title_parts = title.split('; ') + xywh = '0,0,0,0' + title_parts.each do |title_part| + if title_part.include?('bbox') + match_data = /bbox\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/.match title_part + x = match_data[1].to_i + y = match_data[2].to_i + x1 = match_data[3].to_i + y1 = match_data[4].to_i + w = x1 - x + h = y1 - y + xywh = "#{x},#{y},#{w},#{h}" + end + end + annotation(text, xywh) + end + end.compact + end + + def annotation_list + { + :"@context" => "http://iiif.io/api/presentation/2/context.json", + :"@id" => annotation_list_id, + :"@type" => "sc:AnnotationList", + :"@label" => "OCR text granularity of #{@granularity}", + resources: resources + } + end + + def annotation_list_id_base + "{{ '/' | absolute_url }}\ +img/derivatives/iiif/canvas/\ +#{@collection}/#{@identifier}-annotation-list-#{@granularity}.json" + + #File.join OKRACOKE_BASE_URL, @identifier + '-annotation-list-' + @granularity + end + + def annotation_list_id + annotation_list_id_base + '.json' + end + + def annotation(chars, xywh) + { + :"@id" => annotation_id(xywh), + :"@type" => "oa:Annotation", + motivation: "sc:painting", + resource: { + :"@type" => "cnt:ContentAsText", + format: "text/plain", + chars: chars + }, + # TODO: use canvas_url_template + on: on_canvas(xywh) + } + end + + def annotation_id(xywh) + File.join annotation_list_id_base, xywh + end + + def on_canvas(xywh) + manifest_canvas_on_xywh(@identifier, xywh) + end + + def to_json + annotation_list.to_json + end + + def id + @identifier + end + + def to_yaml + yaml_list = { + 'id' => @identifier, + 'label' => @identifier + '-annotation-list-' + @granularity, + 'target' => @canvas_uri, + 'resources' => [] + } + annotation_list[:resources].each do |resource| + yaml_list['resources'] << { + 'xywh' => resource[:@id].sub(/.*\/(.*)/, '\1'), + 'chars' => resource[:resource][:chars] + } + end + yaml_list.to_yaml + end + + def save + FileUtils.mkdir_p("./_data/annotations/#{@collection}/#{@collection}") + # TODO: handle item as distinct from collection + File.open("./_data/annotations/#{@collection}/#{@collection}/#{@collection}_#{@identifier}_ocr_#{@granularity}.yaml", 'w') do |file| + file.write(to_yaml) + end + end + +end diff --git a/lib/wax_tasks.rb b/lib/wax_tasks.rb index 0970828..427b470 100644 --- a/lib/wax_tasks.rb +++ b/lib/wax_tasks.rb @@ -13,6 +13,8 @@ require 'safe_yaml' # relative +require_relative 'wax_tasks/annotation' +require_relative 'wax_tasks/annotationlist' require_relative 'wax_tasks/asset' require_relative 'wax_tasks/collection' require_relative 'wax_tasks/config' diff --git a/lib/wax_tasks/annotation.rb b/lib/wax_tasks/annotation.rb new file mode 100644 index 0000000..3c8e3cd --- /dev/null +++ b/lib/wax_tasks/annotation.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +# +class Annotation + def initialize(annotationlist_id, canvas_id, xywh, + resource = {}, options = {}) + @annotationlist_id = annotationlist_id + @canvas_id = canvas_id + @xywh = xywh # TODO: validate xywh + @type = 'oa:Annotation' + @motivation = options[:motivation] || 'sc:painting' + @resource = + { + :@type => resource[:type] || 'cnt:ContentAsText', + chars: resource[:chars] || '', + format: resource[:format] || 'text/plain' + # TODO: extend or subclass this as needed for other kinds of annotations + } + @resource[:language] = resource[:language] unless resource[:language].nil? + end + + def to_hash + { + :@context => 'http://iiif.io/api/presentation/2/context.json', + :@id => @annotationlist_id + '#' + @xywh, + :@type => @type, + motivation: @motivation, + resource: @resource, + on: @canvas_id + '#' + @xywh + } + end +end diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb new file mode 100644 index 0000000..6ae39e4 --- /dev/null +++ b/lib/wax_tasks/annotationlist.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +module WaxTasks + # + class AnnotationList + attr_reader :name + + def initialize(annotation_list) + # input is in format of annotation list yaml + @name = annotation_list['id'] + @label = annotation_list['label'] + @target = annotation_list['target'] + @type = 'sc:AnnotationList' + @resources = annotation_list["resources"].map do |resource| + { + :@type => resource['type'] || 'cnt:ContentAsText', + chars: resource['chars'] || '', + format: resource['format'] || 'text/plain', + xywh: resource['xywh'] || '' + # TODO: extend or subclass this as needed for other kinds of annotations + } + end + end + + def to_json + { + :@context => 'http://iiif.io/api/presentation/2/context.json', + :@id => 'id placeholder', + :@type => @type, + label: @label, + resources: @resources.map do |resource| + { + :@id => "id placeholder/#{resource[:xywh]}", + :@type => 'oa:Annotation', + motivation: 'sc:painting', + resource: { + :@type => resource[:@type], + format: resource[:format], + chars: resource[:chars] + }, + on: "#{@target}#xywh=#{resource[:xywh]}" + } + end + }.to_json + end + + def save + path = "#{dir}/#{Utils.slug(@pid)}.md" + if File.exist? path + 0 + else + FileUtils.mkdir_p File.dirname(path) + File.open(path, 'w') { |f| f.puts "#{@hash.to_yaml}---" } + 1 + end + end + end +end diff --git a/lib/wax_tasks/collection.rb b/lib/wax_tasks/collection.rb index e672c9a..819269e 100644 --- a/lib/wax_tasks/collection.rb +++ b/lib/wax_tasks/collection.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'collection/annotations' require_relative 'collection/images' require_relative 'collection/metadata' @@ -8,10 +9,12 @@ module WaxTasks class Collection attr_reader :name, :config, :ext, :search_fields, :page_source, :metadata_source, :imagedata_source, - :iiif_derivative_source, :simple_derivative_source + :iiif_derivative_source, :simple_derivative_source, + :annotationdata_source include Collection::Metadata include Collection::Images + include Collection::Annotations IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives' DEFAULT_VARIANTS = { 'thumbnail' => 250, 'fullwidth' => 1140 }.freeze @@ -27,6 +30,7 @@ def initialize(name, config, source, collections_dir, ext) @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source') @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source') @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif' + @annotationdata_source = Utils.safe_join source, '_data', config.dig('annotations', 'source') @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple' @search_fields = %w[pid label thumbnail permalink collection] @image_variants = image_variants diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb new file mode 100644 index 0000000..cbdaae3 --- /dev/null +++ b/lib/wax_tasks/collection/annotations.rb @@ -0,0 +1,137 @@ +# frozen_string_literal: true + +# +module WaxTasks + # + class Collection + # + module Annotations + # + # + def annotations_from_annotationdata + raise Error::MissingSource, "Cannot find annotation data source '#{@annotationdata_source}'" unless Dir.exist? @annotationdata_source + + records = records_from_metadata + + Dir.glob(Utils.safe_join(@annotationdata_source, '*')).map do |path| + item = WaxTasks::Item.new(path, {}) + item.record = records.find { |r| r.pid == item.pid } + item.annotation_config = @config.dig 'annotations' + warn Rainbow("\nCould not find record in #{@annotationdata_source} for image item #{path}.\n").orange if item.record.nil? + item + end.compact + end + + # + # + def write_annotations(dir) + puts Rainbow("Generating annotations for collection '#{@name}'").cyan + bar = ProgressBar.new(annotations_from_annotationdata.length) + bar.write + annotations_from_annotationdata.map do |item| + item.annotations.each do |p| + path = "#{Utils.safe_join dir, File.basename(p, '.*')}.json" + # img/derivatives/iiif/annotation/test_collection_0_ocr_paragraph.json + FileUtils.mkdir_p File.dirname(path) + next if File.exist? path + + # load yaml, write json + annotationlist = WaxTasks::AnnotationList.new(YAML.load_file(p, safe: true)) + File.write(path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") + + # add to manifest + # TODO: this should be all done in wax_iiif, really, though + # the workflow sequencing that implies needs to be thought out + + collection_dir_name = File.basename(@annotationdata_source) + manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' + raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(raw_json) + canvas_id = "#{collection_dir_name}_#{annotationlist.name}" + + this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| + canvas['@id'] == + "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" + end + + # TODO: allow multiple annotationlists + + if this_canvas.dig('otherContent', 0, '@id') == + "{{ '/' | absolute_url }}#{path}" + puts "AnnotationList #{canvas_id} already linked in Manifest" + else + this_canvas['otherContent'] = [ + { + '@id' => "{{ '/' | absolute_url }}#{path}", + '@type' => 'sc:AnnotationList' + } + ] + File.open(manifest_path, 'w') { |f| f.write("#{raw_yaml}#{manifest.to_json}") } + end + end + # TODO: do we want to update the item-level csv? + + bar.increment! + bar.write + item + end.flat_map(&:record).compact + end + + # + # + def iiif_builder(dir) + build_opts = { + base_url: "{{ '/' | absolute_url }}#{dir}", + output_dir: dir, + collection_label: @name + } + WaxIiif::Builder.new(build_opts) + end + + # + # + def add_font_matter_to_json_files(dir) + Dir.glob("#{dir}/**/*.json").each do |f| + Utils.add_yaml_front_matter_to_file f + end + end + + # + # + def add_iiif_results_to_records(records, manifests) + records.map do |record| + next nil if record.nil? + + manifest = manifests.find { |m| m.base_id == record.pid } + next record if manifest.nil? + + json = JSON.parse manifest.to_json + @image_variants.each do |k, _v| + value = json.dig k + record.set k, "/#{Utils.content_clean(value)}" unless value.nil? + end + + record.set 'manifest', "/#{Utils.content_clean(manifest.id)}" + record + end.compact + end + + # + # + def write_iiif_derivatives(dir) + items = items_from_imagedata + iiif_data = items.map(&:iiif_image_records).flatten + builder = iiif_builder(dir) + + builder.load iiif_data + + puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan + builder.process_data + records = items.map(&:record).compact + + add_font_matter_to_json_files dir + add_iiif_results_to_records records, builder.manifests + end + end + end +end diff --git a/lib/wax_tasks/item.rb b/lib/wax_tasks/item.rb index 7b79e06..4275eca 100644 --- a/lib/wax_tasks/item.rb +++ b/lib/wax_tasks/item.rb @@ -3,7 +3,7 @@ module WaxTasks # class Item - attr_accessor :record, :iiif_config + attr_accessor :record, :iiif_config, :annotation_config attr_reader :pid # @@ -60,6 +60,13 @@ def simple_derivatives @assets.map(&:simple_derivatives).flatten end + # + # + def annotations + # TODO: integrate this with assets handling? + Dir.glob("#{@path}/*").sort + end + # # def logo diff --git a/lib/wax_tasks/site.rb b/lib/wax_tasks/site.rb index 7c5852b..f56f8eb 100644 --- a/lib/wax_tasks/site.rb +++ b/lib/wax_tasks/site.rb @@ -82,5 +82,18 @@ def generate_derivatives(name, type) collection.update_metadata records puts Rainbow("\nDone ✔").green end + + # + # + def generate_annotations(name) + collection = @config.find_collection name + + raise WaxTasks::Error::InvalidCollection if collection.nil? + + output_dir = Utils.safe_join @config.source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif/annotation' + records = collection.write_annotations output_dir + collection.update_metadata records + puts Rainbow("\nDone ✔").green + end end end diff --git a/spec/sample_hocr/img_item_1.hocr b/spec/sample_hocr/img_item_1.hocr new file mode 100644 index 0000000..d8d244f --- /dev/null +++ b/spec/sample_hocr/img_item_1.hocr @@ -0,0 +1,23 @@ + + + + + + + + + + +
+
+

+ If the ax + + falls the + +

+
+
+ + diff --git a/spec/sample_hocr/manifest.json b/spec/sample_hocr/manifest.json new file mode 100644 index 0000000..4363a0d --- /dev/null +++ b/spec/sample_hocr/manifest.json @@ -0,0 +1,49 @@ +--- +layout: none +--- +{ + "@context": "http://iiif.io/api/presentation/2/context.json", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/test_collection/manifest.json", + "@type": "sc:Manifest", + "label": "test_collection", + "thumbnail": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/250,/0/default.jpg", + "viewingDirection": "left-to-right", + "viewingHint": "individuals", + "sequences": [ + { + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/sequence/test_collection_img_item_1.json", + "@type": "sc:Sequence", + "canvases": [ + { + "@type": "sc:Canvas", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json", + "label": "Page 1", + "width": 2600, + "height": 1697, + "thumbnail": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/250,/0/default.jpg", + "images": [ + { + "@type": "oa:Annotation", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1.json", + "motivation": "sc:painting", + "resource": { + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/full/0/default.jpg", + "@type": "dcterms:Image", + "format": "image/jpeg", + "service": { + "@context": "http://iiif.io/api/image/2/context.json", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1", + "profile": "http://iiif.io/api/image/2/level0.json" + }, + "width": 2600, + "height": 1697 + }, + "on": "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" + } + ] + } + ] + } + ], + "full": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/full/0/default.jpg" +} \ No newline at end of file diff --git a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml new file mode 100644 index 0000000..c6fd088 --- /dev/null +++ b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml @@ -0,0 +1,7 @@ +--- +id: 'img_item_1' +label: img_item_1-annotation-list-paragraph +target: "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" +resources: +- xywh: '20,668,171,100' + chars: If the ax falls the diff --git a/spec/sample_site/_config.yml b/spec/sample_site/_config.yml index 89f3bba..247f73f 100644 --- a/spec/sample_site/_config.yml +++ b/spec/sample_site/_config.yml @@ -10,6 +10,8 @@ collections: source: valid.csv images: source: 'images/test_collection' + annotations: + source: 'annotations/test_collection' json_collection: layout: default.html metadata: diff --git a/spec/wax_tasks/annotations_spec.rb b/spec/wax_tasks/annotations_spec.rb new file mode 100644 index 0000000..05828af --- /dev/null +++ b/spec/wax_tasks/annotations_spec.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +describe WaxTasks::AnnotationList do + include_context 'shared' + + include_context 'shared' + let(:config) { site_from_config_file.config } + let(:source) { config.source } + let(:collections_dir) { config.collections_dir } + + before(:all) do + Test.reset + end + + # + # =================================================== + # ANNOTATION.NEW + # =================================================== + # + describe '#new' do + context 'generates json' do + it 'works' do + byebug + expect { WaxTasks::AnnotationList.new({}) }.not_to raise_error + end + end + end +end diff --git a/spec/wax_tasks/hocr_spec.rb b/spec/wax_tasks/hocr_spec.rb new file mode 100644 index 0000000..fbefe7f --- /dev/null +++ b/spec/wax_tasks/hocr_spec.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require_relative '../../lib/tasks/import/hocr.rb' + +describe HocrOpenAnnotationCreator do + include_context 'shared' + + before(:all) do + Test.reset + end + + # + # =================================================== + # HocrOpenAnnotationCreator.NEW + # =================================================== + # + describe '#new' do + include_context 'shared' + + context 'parses hocr file not to raise error' do + it 'runs without errors' do + expect { HocrOpenAnnotationCreator.new({ + hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", + collection: 'test_collection', + canvas: 'img_item_1', + granularity: 'paragraph' + }) }.not_to raise_error + end + end + end + + describe '#save' do + include_context 'shared' + + context 'hocr yaml file' do + hocr = HocrOpenAnnotationCreator.new({ + hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", + collection: 'test_collection', + canvas: 'img_item_1', + granularity: 'paragraph' + }) + + it 'captures chars correctly' do + expect(hocr.resources.first[:resource][:chars]).to eq('If the ax falls the') + end + + it 'captures target correctly' do + expect(hocr.resources.first[:on]).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") + end + end + end + +end diff --git a/spec/wax_tasks/site_spec.rb b/spec/wax_tasks/site_spec.rb index da4f427..70219a2 100644 --- a/spec/wax_tasks/site_spec.rb +++ b/spec/wax_tasks/site_spec.rb @@ -272,4 +272,46 @@ end end end + + # + # =================================================== + # SITE.GENERATE_ANNOTATIONLISTS (NAME) + # =================================================== + # + + describe '#generate_annotationlists' do + before(:each) do + FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + FileUtils.mkdir_p "#{BUILD}/img/derivatives/iiif/test_collection/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/manifest.json"), "#{BUILD}/img/derivatives/iiif/test_collection/" + end + + # TODO: mock or stub the annotation and manifest files, break up this block + context 'when generates sample annotationlist' do + it 'generates annotationlist and updates manifest' do + + expect { site_from_config_file.generate_annotations('csv_collection') }.not_to raise_error + + json_file = "#{BUILD}/img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" + expect(File).to exist(json_file) + raw_yaml, raw_json = File.read(json_file).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + annotation = JSON.parse(raw_json)['resources'].first + expect(annotation['on']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") + expect(annotation['resource']['chars']).to eq('If the ax falls the') + + manifest_path = "#{BUILD}/img/derivatives/iiif/test_collection/manifest.json" + raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(raw_json) + expect(manifest['sequences'][0]['canvases'][0]['otherContent']).not_to be_nil + expect(manifest['sequences'][0]['canvases'][0]['otherContent'][0]['@id']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json") + end + end + + after(:each) do + FileUtils.rm Dir.glob("#{BUILD}/_data/annotations/test_collection/dir_imgs_item/*.yaml") + FileUtils.rm Dir.glob("#{BUILD}/img/derivatives/iiif/test_collection/manifest.json") + end + end + end From dff3073752fbac0d396e7f57e0a97a4a1e18744f Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Thu, 15 Oct 2020 14:34:44 -0600 Subject: [PATCH 2/9] Clean up and refactor --- lib/wax_tasks/collection/annotations.rb | 105 ++++++------------------ 1 file changed, 26 insertions(+), 79 deletions(-) diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb index cbdaae3..ef95a6f 100644 --- a/lib/wax_tasks/collection/annotations.rb +++ b/lib/wax_tasks/collection/annotations.rb @@ -39,36 +39,9 @@ def write_annotations(dir) annotationlist = WaxTasks::AnnotationList.new(YAML.load_file(p, safe: true)) File.write(path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") - # add to manifest - # TODO: this should be all done in wax_iiif, really, though - # the workflow sequencing that implies needs to be thought out - - collection_dir_name = File.basename(@annotationdata_source) - manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' - raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] - manifest = JSON.parse(raw_json) - canvas_id = "#{collection_dir_name}_#{annotationlist.name}" - - this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| - canvas['@id'] == - "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" - end - - # TODO: allow multiple annotationlists - - if this_canvas.dig('otherContent', 0, '@id') == - "{{ '/' | absolute_url }}#{path}" - puts "AnnotationList #{canvas_id} already linked in Manifest" - else - this_canvas['otherContent'] = [ - { - '@id' => "{{ '/' | absolute_url }}#{path}", - '@type' => 'sc:AnnotationList' - } - ] - File.open(manifest_path, 'w') { |f| f.write("#{raw_yaml}#{manifest.to_json}") } - end + add_annotationlist_to_manfest(dir, annotationlist, path) end + # TODO: do we want to update the item-level csv? bar.increment! @@ -79,58 +52,32 @@ def write_annotations(dir) # # - def iiif_builder(dir) - build_opts = { - base_url: "{{ '/' | absolute_url }}#{dir}", - output_dir: dir, - collection_label: @name - } - WaxIiif::Builder.new(build_opts) - end - - # - # - def add_font_matter_to_json_files(dir) - Dir.glob("#{dir}/**/*.json").each do |f| - Utils.add_yaml_front_matter_to_file f + def add_annotationlist_to_manfest(dir, annotationlist, path) + collection_dir_name = File.basename(@annotationdata_source) + manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' + raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(raw_json) + canvas_id = "#{collection_dir_name}_#{annotationlist.name}" + + this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| + canvas['@id'] == + "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" end - end - - # - # - def add_iiif_results_to_records(records, manifests) - records.map do |record| - next nil if record.nil? - manifest = manifests.find { |m| m.base_id == record.pid } - next record if manifest.nil? - - json = JSON.parse manifest.to_json - @image_variants.each do |k, _v| - value = json.dig k - record.set k, "/#{Utils.content_clean(value)}" unless value.nil? - end - - record.set 'manifest', "/#{Utils.content_clean(manifest.id)}" - record - end.compact - end - - # - # - def write_iiif_derivatives(dir) - items = items_from_imagedata - iiif_data = items.map(&:iiif_image_records).flatten - builder = iiif_builder(dir) - - builder.load iiif_data - - puts Rainbow("Generating IIIF derivatives for collection '#{@name}'\nThis might take awhile.").cyan - builder.process_data - records = items.map(&:record).compact - - add_font_matter_to_json_files dir - add_iiif_results_to_records records, builder.manifests + # TODO: allow multiple annotationlists + # TODO: this has to run for annotationlists which are created as json in img/derivatives/iiif/annotations + if this_canvas.dig('otherContent', 0, '@id') == + "{{ '/' | absolute_url }}#{path}" + puts "AnnotationList #{canvas_id} already linked in Manifest" + else + this_canvas['otherContent'] = [ + { + '@id' => "{{ '/' | absolute_url }}#{path}", + '@type' => 'sc:AnnotationList' + } + ] + File.open(manifest_path, 'w') { |f| f.write("#{raw_yaml}#{manifest.to_json}") } + end end end end From 7949e3940f72fb520bc722cd4bea04825a4edec9 Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Fri, 16 Oct 2020 17:02:51 -0600 Subject: [PATCH 3/9] wip --- lib/tasks/annotations.rake | 23 +++++++++++++++ lib/tasks/import/hocr.rb | 28 ++++++++++++------- lib/wax_tasks/annotationlist.rb | 5 +++- lib/wax_tasks/collection/annotations.rb | 18 ++++++++---- ...t_collection_img_item_1_ocr_paragraph.yaml | 4 ++- spec/wax_tasks/site_spec.rb | 6 ++-- 6 files changed, 64 insertions(+), 20 deletions(-) diff --git a/lib/tasks/annotations.rake b/lib/tasks/annotations.rake index fc223d3..eac9520 100644 --- a/lib/tasks/annotations.rake +++ b/lib/tasks/annotations.rake @@ -13,4 +13,27 @@ namespace :wax do site = WaxTasks::Site.new args.each { |a| site.generate_annotations(a) } end + + task :updatemanifest do + args = ARGV.drop(1).each { |a| task a.to_sym } + args.reject! { |a| a.start_with? '-' } + + raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:updatemanifest'").magenta if args.empty? + + site = WaxTasks::Site.new + args.each do |collection_name| + collection = site.collections.find { |c| c.name == collection_name } + annotationdata_source = collection.annotationdata_source + + annotationlists = Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort + + byebug + + # dir: img/derivatives/iiif/annotation + # annotationlist: + # path: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json + + site.add_annotationlist_to_manfest(dir, annotationlists, path) + end + end end diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb index 897fcbf..446b9d2 100644 --- a/lib/tasks/import/hocr.rb +++ b/lib/tasks/import/hocr.rb @@ -1,29 +1,33 @@ -require 'nokogiri' require 'addressable/template' require 'json' +require 'nokogiri' require 'yaml' + require 'byebug' -# adapted from Okracoke: +# adapted from Okracoke: # https://github.com/NCSU-Libraries/ocracoke/blob/master/app/processing_helpers/hocr_open_annotation_creator.rb - +module WaxTasks class HocrOpenAnnotationCreator def initialize(args) - @canvas_uri = "{{ '/' | absolute_url }}\ -img/derivatives/iiif/canvas/\ -#{args[:collection]}_#{args[:canvas]}.json" - @hocr = File.open(args[:hocr_path]){ |f| Nokogiri::XML(f) } @collection = args[:collection] @identifier = args[:canvas] @granularity = args[:granularity] + @uri_root = "{{ '/' | absolute_url }}\img/derivatives/iiif" + @canvas_root = "#{@collection}_#{@identifier}" + @label = "#{@canvas_root}_ocr_#{@granularity}" + + @canvas_uri = "#{@uri_root}/canvas/#{@canvas_root}.json" + @list_uri = "#{@uri_root}/annotation/#{@label}.json" + @selector = get_selector end def manifest_canvas_on_xywh(id, xywh) - @canvas_uri + "#xywh=#{xywh}" + "#{@canvas_uri}#xywh=#{xywh}" end def get_selector @@ -67,7 +71,7 @@ def annotation_list :"@context" => "http://iiif.io/api/presentation/2/context.json", :"@id" => annotation_list_id, :"@type" => "sc:AnnotationList", - :"@label" => "OCR text granularity of #{@granularity}", + :"@label" => "OCR text with granularity of #{@granularity}", resources: resources } end @@ -117,8 +121,10 @@ def id def to_yaml yaml_list = { + 'uri' => @list_uri, + 'collection' => @collection, 'id' => @identifier, - 'label' => @identifier + '-annotation-list-' + @granularity, + 'label' => @label, 'target' => @canvas_uri, 'resources' => [] } @@ -134,9 +140,11 @@ def to_yaml def save FileUtils.mkdir_p("./_data/annotations/#{@collection}/#{@collection}") # TODO: handle item as distinct from collection + # TODO: do not overwrite existing file without asking File.open("./_data/annotations/#{@collection}/#{@collection}/#{@collection}_#{@identifier}_ocr_#{@granularity}.yaml", 'w') do |file| file.write(to_yaml) end end end +end diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb index 6ae39e4..c5292ab 100644 --- a/lib/wax_tasks/annotationlist.rb +++ b/lib/wax_tasks/annotationlist.rb @@ -3,13 +3,16 @@ module WaxTasks # class AnnotationList - attr_reader :name + attr_reader :name, :label def initialize(annotation_list) # input is in format of annotation list yaml + @uri = annotation_list['uri'] + @collection = annotation_list['collection'] @name = annotation_list['id'] @label = annotation_list['label'] @target = annotation_list['target'] + @type = 'sc:AnnotationList' @resources = annotation_list["resources"].map do |resource| { diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb index ef95a6f..7ee55bc 100644 --- a/lib/wax_tasks/collection/annotations.rb +++ b/lib/wax_tasks/collection/annotations.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true - +require 'byebug' # module WaxTasks # @@ -39,7 +39,7 @@ def write_annotations(dir) annotationlist = WaxTasks::AnnotationList.new(YAML.load_file(p, safe: true)) File.write(path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") - add_annotationlist_to_manfest(dir, annotationlist, path) + add_annotationlist_to_manifest(annotationlist, path) end # TODO: do we want to update the item-level csv? @@ -52,11 +52,17 @@ def write_annotations(dir) # # - def add_annotationlist_to_manfest(dir, annotationlist, path) + def add_annotationlist_to_manifest(annotationlist, path) + # dir: img/derivatives/iiif/annotation + # annotationlist: + # path: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json +byebug + dir = 'img/derivatives/iiif/annotation' collection_dir_name = File.basename(@annotationdata_source) + byebug manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' - raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] - manifest = JSON.parse(raw_json) + manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(manifest_body) canvas_id = "#{collection_dir_name}_#{annotationlist.name}" this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| @@ -76,7 +82,7 @@ def add_annotationlist_to_manfest(dir, annotationlist, path) '@type' => 'sc:AnnotationList' } ] - File.open(manifest_path, 'w') { |f| f.write("#{raw_yaml}#{manifest.to_json}") } + File.open(manifest_path, 'w') { |f| f.write("#{manifest_front_matter}#{manifest.to_json}") } end end end diff --git a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml index c6fd088..ce490f7 100644 --- a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml +++ b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml @@ -1,6 +1,8 @@ --- +uri: "{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" +collection: test_collection id: 'img_item_1' -label: img_item_1-annotation-list-paragraph +label: test_collection_img_item_1_ocr_paragraph target: "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" resources: - xywh: '20,668,171,100' diff --git a/spec/wax_tasks/site_spec.rb b/spec/wax_tasks/site_spec.rb index 70219a2..a0ec701 100644 --- a/spec/wax_tasks/site_spec.rb +++ b/spec/wax_tasks/site_spec.rb @@ -290,16 +290,18 @@ # TODO: mock or stub the annotation and manifest files, break up this block context 'when generates sample annotationlist' do it 'generates annotationlist and updates manifest' do - + byebug expect { site_from_config_file.generate_annotations('csv_collection') }.not_to raise_error - +byebug json_file = "#{BUILD}/img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" expect(File).to exist(json_file) + raw_yaml, raw_json = File.read(json_file).match(/(---\n.+?\n---\n)(.*)/m)[1..2] annotation = JSON.parse(raw_json)['resources'].first expect(annotation['on']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") expect(annotation['resource']['chars']).to eq('If the ax falls the') + # link to annotation list has been added to manifest manifest_path = "#{BUILD}/img/derivatives/iiif/test_collection/manifest.json" raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] manifest = JSON.parse(raw_json) From 789910bfa0ab5266db3103d5e8637868176e718b Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Sun, 18 Oct 2020 15:26:53 -0600 Subject: [PATCH 4/9] Annotation processing finished --- lib/tasks/annotations.rake | 18 ++-- lib/tasks/import/hocr.rb | 2 +- lib/wax_tasks/annotationlist.rb | 8 +- lib/wax_tasks/collection/annotations.rb | 101 +++++++++++++----- lib/wax_tasks/site.rb | 1 + ...t_collection_img_item_1_ocr_paragraph.yaml | 2 +- spec/wax_tasks/annotations_spec.rb | 33 ++++-- spec/wax_tasks/site_spec.rb | 19 ++-- 8 files changed, 118 insertions(+), 66 deletions(-) diff --git a/lib/tasks/annotations.rake b/lib/tasks/annotations.rake index eac9520..08e1cd1 100644 --- a/lib/tasks/annotations.rake +++ b/lib/tasks/annotations.rake @@ -3,7 +3,7 @@ require 'wax_tasks' namespace :wax do - desc 'generate annotationlists from local yaml files' + desc 'generate annotationlists from local yaml/json files' task :annotations do args = ARGV.drop(1).each { |a| task a.to_sym } args.reject! { |a| a.start_with? '-' } @@ -21,19 +21,17 @@ namespace :wax do raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:updatemanifest'").magenta if args.empty? site = WaxTasks::Site.new + config = WaxTasks.config_from_file + + dir = 'img/derivatives/iiif/annotation' + args.each do |collection_name| collection = site.collections.find { |c| c.name == collection_name } annotationdata_source = collection.annotationdata_source - annotationlists = Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort - - byebug - - # dir: img/derivatives/iiif/annotation - # annotationlist: - # path: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json - - site.add_annotationlist_to_manfest(dir, annotationlists, path) + collection.add_annotationlists_to_manifest( + Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort + ) end end end diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb index 446b9d2..33a9d72 100644 --- a/lib/tasks/import/hocr.rb +++ b/lib/tasks/import/hocr.rb @@ -123,7 +123,7 @@ def to_yaml yaml_list = { 'uri' => @list_uri, 'collection' => @collection, - 'id' => @identifier, + 'canvas' => @identifier, 'label' => @label, 'target' => @canvas_uri, 'resources' => [] diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb index c5292ab..7cb2410 100644 --- a/lib/wax_tasks/annotationlist.rb +++ b/lib/wax_tasks/annotationlist.rb @@ -3,13 +3,13 @@ module WaxTasks # class AnnotationList - attr_reader :name, :label + attr_reader :canvas, :label def initialize(annotation_list) # input is in format of annotation list yaml @uri = annotation_list['uri'] @collection = annotation_list['collection'] - @name = annotation_list['id'] + @canvas = annotation_list['canvas'] @label = annotation_list['label'] @target = annotation_list['target'] @@ -23,17 +23,17 @@ def initialize(annotation_list) # TODO: extend or subclass this as needed for other kinds of annotations } end + byebug end def to_json { :@context => 'http://iiif.io/api/presentation/2/context.json', - :@id => 'id placeholder', + :@id => @uri, :@type => @type, label: @label, resources: @resources.map do |resource| { - :@id => "id placeholder/#{resource[:xywh]}", :@type => 'oa:Annotation', motivation: 'sc:painting', resource: { diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb index 7ee55bc..40eaa0d 100644 --- a/lib/wax_tasks/collection/annotations.rb +++ b/lib/wax_tasks/collection/annotations.rb @@ -6,6 +6,14 @@ module WaxTasks class Collection # module Annotations + # + # + def get_source_type(source_path) + source_type = File.extname source_path # '.yaml' or '.json' + source_type = '.yaml' if source_type == '.yml' + source_type + end + # # def annotations_from_annotationdata @@ -29,17 +37,26 @@ def write_annotations(dir) bar = ProgressBar.new(annotations_from_annotationdata.length) bar.write annotations_from_annotationdata.map do |item| - item.annotations.each do |p| - path = "#{Utils.safe_join dir, File.basename(p, '.*')}.json" + item.annotations.each do |source_path| + dest_path = "#{Utils.safe_join dir, File.basename(source_path, '.*')}.json" # img/derivatives/iiif/annotation/test_collection_0_ocr_paragraph.json - FileUtils.mkdir_p File.dirname(path) - next if File.exist? path + FileUtils.mkdir_p File.dirname(dest_path) + next if File.exist? dest_path - # load yaml, write json - annotationlist = WaxTasks::AnnotationList.new(YAML.load_file(p, safe: true)) - File.write(path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") + source_type = get_source_type source_path + case source_type + when '.yaml' + # load yaml, write json + annotationlist = WaxTasks::AnnotationList.new(SafeYAML.load_file(source_path)) + File.write(dest_path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") - add_annotationlist_to_manifest(annotationlist, path) + # add_annotationlist_to_manifest(annotationlist, dest_path) + when '.json' + # TODO: handle json input - we assume it has uris in final jekyll-ready form + # e.g. {{ '/' | absolute_url }}img/derivatives/iiif/annotation/recipebook_002_clippings.json + + FileUtils.cp source_path, dest_path + end end # TODO: do we want to update the item-level csv? @@ -52,37 +69,69 @@ def write_annotations(dir) # # - def add_annotationlist_to_manifest(annotationlist, path) - # dir: img/derivatives/iiif/annotation - # annotationlist: - # path: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json -byebug + def add_annotationlists_to_manifest(annotationlists) dir = 'img/derivatives/iiif/annotation' collection_dir_name = File.basename(@annotationdata_source) - byebug + manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] manifest = JSON.parse(manifest_body) - canvas_id = "#{collection_dir_name}_#{annotationlist.name}" + byebug + annotationlists.each do |list_path| + source_type = get_source_type list_path + list = nil + canvas_id = nil + + case source_type + when '.yaml' + list = SafeYAML.load_file(list_path) + canvas_id = "#{list['collection']}_#{list['canvas']}" + when '.json' + # TODO: encapsulate this yaml/json handling in a class + list_front_matter, list_body = File.read(list_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + list_yaml = YAML.load list_front_matter + list = JSON.parse(list_body) + canvas_id = "#{list_yaml['collection']}_#{list_yaml['canvas']}" + end + add_annotationlist_to_manifest(manifest, list, canvas_id) + end + + # TODO : save only if changed + File.open(manifest_path, 'w') do |f| + f.write("#{manifest_front_matter}#{manifest.to_json}") + end + + end + + # + # + def add_annotationlist_to_manifest(manifest, annotationlist, canvas_id) + # dir: img/derivatives/iiif/annotation + # annotationlist: + # annotationlist_uri: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json + dir = 'img/derivatives/iiif/annotation' + annotationlist_uri = annotationlist['uri'] + annotationlist_uri ||= annotationlist['@id'] + + # TODO: deal with multiple sequences, possibly containing same canvas (?) this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| canvas['@id'] == "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" end - - # TODO: allow multiple annotationlists +byebug # TODO: this has to run for annotationlists which are created as json in img/derivatives/iiif/annotations - if this_canvas.dig('otherContent', 0, '@id') == - "{{ '/' | absolute_url }}#{path}" + this_canvas['otherContent'] ||= [] + + # TODO: remove entries for annotationlists that have been deleted + + if this_canvas['otherContent'].find { |c| c['@id'] == annotationlist_uri } puts "AnnotationList #{canvas_id} already linked in Manifest" else - this_canvas['otherContent'] = [ - { - '@id' => "{{ '/' | absolute_url }}#{path}", - '@type' => 'sc:AnnotationList' - } - ] - File.open(manifest_path, 'w') { |f| f.write("#{manifest_front_matter}#{manifest.to_json}") } + this_canvas['otherContent'] << { + '@id' => annotationlist_uri, + '@type' => 'sc:AnnotationList' + } end end end diff --git a/lib/wax_tasks/site.rb b/lib/wax_tasks/site.rb index f56f8eb..ae0bf41 100644 --- a/lib/wax_tasks/site.rb +++ b/lib/wax_tasks/site.rb @@ -92,6 +92,7 @@ def generate_annotations(name) output_dir = Utils.safe_join @config.source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif/annotation' records = collection.write_annotations output_dir + byebug collection.update_metadata records puts Rainbow("\nDone ✔").green end diff --git a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml index ce490f7..a0f4872 100644 --- a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml +++ b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml @@ -1,7 +1,7 @@ --- uri: "{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" collection: test_collection -id: 'img_item_1' +canvas: 'img_item_1' label: test_collection_img_item_1_ocr_paragraph target: "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" resources: diff --git a/spec/wax_tasks/annotations_spec.rb b/spec/wax_tasks/annotations_spec.rb index 05828af..9f4b91e 100644 --- a/spec/wax_tasks/annotations_spec.rb +++ b/spec/wax_tasks/annotations_spec.rb @@ -3,25 +3,36 @@ describe WaxTasks::AnnotationList do include_context 'shared' - include_context 'shared' - let(:config) { site_from_config_file.config } - let(:source) { config.source } - let(:collections_dir) { config.collections_dir } - before(:all) do Test.reset end # # =================================================== - # ANNOTATION.NEW + # ANNOTATION.UPDATEMANIFEST # =================================================== # - describe '#new' do - context 'generates json' do - it 'works' do - byebug - expect { WaxTasks::AnnotationList.new({}) }.not_to raise_error + describe '#updatemanifest' do + context 'updates manifest' do + it 'updates manifest' do + FileUtils.mkdir_p "#{BUILD}/img/derivatives/iiif/test_collection/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/manifest.json"), "#{BUILD}/img/derivatives/iiif/test_collection/" + FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + + config = WaxTasks::Config.new(config || WaxTasks.config_from_file) + collection = config.find_collection 'csv_collection' + + collection.add_annotationlists_to_manifest( + Dir.glob("#{BUILD}/_data/annotations/test_collection/dir_imgs_item/*.{yaml,yml,json}").sort + ) + + manifest_path = "#{BUILD}/img/derivatives/iiif/test_collection/manifest.json" + raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(raw_json) + + expect(manifest['sequences'][0]['canvases'][0]['otherContent']).not_to be_nil + expect(manifest['sequences'][0]['canvases'][0]['otherContent'][0]['@id']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json") end end end diff --git a/spec/wax_tasks/site_spec.rb b/spec/wax_tasks/site_spec.rb index a0ec701..3934624 100644 --- a/spec/wax_tasks/site_spec.rb +++ b/spec/wax_tasks/site_spec.rb @@ -280,7 +280,7 @@ # describe '#generate_annotationlists' do - before(:each) do + before(:example) do FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" FileUtils.mkdir_p "#{BUILD}/img/derivatives/iiif/test_collection/" @@ -289,28 +289,21 @@ # TODO: mock or stub the annotation and manifest files, break up this block context 'when generates sample annotationlist' do - it 'generates annotationlist and updates manifest' do - byebug + it 'runs without error' do expect { site_from_config_file.generate_annotations('csv_collection') }.not_to raise_error -byebug + end + + it 'generates annotationlist' do json_file = "#{BUILD}/img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" expect(File).to exist(json_file) - raw_yaml, raw_json = File.read(json_file).match(/(---\n.+?\n---\n)(.*)/m)[1..2] annotation = JSON.parse(raw_json)['resources'].first expect(annotation['on']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") expect(annotation['resource']['chars']).to eq('If the ax falls the') - - # link to annotation list has been added to manifest - manifest_path = "#{BUILD}/img/derivatives/iiif/test_collection/manifest.json" - raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] - manifest = JSON.parse(raw_json) - expect(manifest['sequences'][0]['canvases'][0]['otherContent']).not_to be_nil - expect(manifest['sequences'][0]['canvases'][0]['otherContent'][0]['@id']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json") end end - after(:each) do + after(:example) do FileUtils.rm Dir.glob("#{BUILD}/_data/annotations/test_collection/dir_imgs_item/*.yaml") FileUtils.rm Dir.glob("#{BUILD}/img/derivatives/iiif/test_collection/manifest.json") end From 11be622167d3fc12847d0d7a99149864a726bedc Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Sun, 18 Oct 2020 16:41:39 -0600 Subject: [PATCH 5/9] Remove byebug --- lib/tasks/import.rake | 2 -- lib/tasks/import/hocr.rb | 2 -- lib/wax_tasks/annotationlist.rb | 1 - lib/wax_tasks/collection/annotations.rb | 6 ++---- lib/wax_tasks/site.rb | 1 - spec/wax_tasks/site_spec.rb | 2 -- 6 files changed, 2 insertions(+), 12 deletions(-) diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index dca297d..5792816 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -2,8 +2,6 @@ require_relative './import/hocr.rb' -require 'byebug' - namespace :wax do namespace :import do task :hocr, [:hocr_path, :collection, :canvas, :granularity] do |_t, args| diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb index 33a9d72..fdaac62 100644 --- a/lib/tasks/import/hocr.rb +++ b/lib/tasks/import/hocr.rb @@ -3,8 +3,6 @@ require 'nokogiri' require 'yaml' -require 'byebug' - # adapted from Okracoke: # https://github.com/NCSU-Libraries/ocracoke/blob/master/app/processing_helpers/hocr_open_annotation_creator.rb module WaxTasks diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb index 7cb2410..b5f4358 100644 --- a/lib/wax_tasks/annotationlist.rb +++ b/lib/wax_tasks/annotationlist.rb @@ -23,7 +23,6 @@ def initialize(annotation_list) # TODO: extend or subclass this as needed for other kinds of annotations } end - byebug end def to_json diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb index 40eaa0d..b9697c8 100644 --- a/lib/wax_tasks/collection/annotations.rb +++ b/lib/wax_tasks/collection/annotations.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -require 'byebug' # module WaxTasks # @@ -76,7 +75,7 @@ def add_annotationlists_to_manifest(annotationlists) manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] manifest = JSON.parse(manifest_body) - byebug + annotationlists.each do |list_path| source_type = get_source_type list_path @@ -119,8 +118,7 @@ def add_annotationlist_to_manifest(manifest, annotationlist, canvas_id) canvas['@id'] == "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" end -byebug - # TODO: this has to run for annotationlists which are created as json in img/derivatives/iiif/annotations + this_canvas['otherContent'] ||= [] # TODO: remove entries for annotationlists that have been deleted diff --git a/lib/wax_tasks/site.rb b/lib/wax_tasks/site.rb index ae0bf41..f56f8eb 100644 --- a/lib/wax_tasks/site.rb +++ b/lib/wax_tasks/site.rb @@ -92,7 +92,6 @@ def generate_annotations(name) output_dir = Utils.safe_join @config.source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif/annotation' records = collection.write_annotations output_dir - byebug collection.update_metadata records puts Rainbow("\nDone ✔").green end diff --git a/spec/wax_tasks/site_spec.rb b/spec/wax_tasks/site_spec.rb index 3934624..a6066e4 100644 --- a/spec/wax_tasks/site_spec.rb +++ b/spec/wax_tasks/site_spec.rb @@ -283,8 +283,6 @@ before(:example) do FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" - FileUtils.mkdir_p "#{BUILD}/img/derivatives/iiif/test_collection/" - FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/manifest.json"), "#{BUILD}/img/derivatives/iiif/test_collection/" end # TODO: mock or stub the annotation and manifest files, break up this block From 430bdcdaf967077ff29758aa6d622f273134b824 Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Fri, 23 Oct 2020 16:19:25 -0600 Subject: [PATCH 6/9] Fix hocr_spec.rb --- spec/wax_tasks/hocr_spec.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/wax_tasks/hocr_spec.rb b/spec/wax_tasks/hocr_spec.rb index fbefe7f..fa4d2c3 100644 --- a/spec/wax_tasks/hocr_spec.rb +++ b/spec/wax_tasks/hocr_spec.rb @@ -2,7 +2,7 @@ require_relative '../../lib/tasks/import/hocr.rb' -describe HocrOpenAnnotationCreator do +describe WaxTasks::HocrOpenAnnotationCreator do include_context 'shared' before(:all) do @@ -19,7 +19,7 @@ context 'parses hocr file not to raise error' do it 'runs without errors' do - expect { HocrOpenAnnotationCreator.new({ + expect { WaxTasks::HocrOpenAnnotationCreator.new({ hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", collection: 'test_collection', canvas: 'img_item_1', @@ -33,7 +33,7 @@ include_context 'shared' context 'hocr yaml file' do - hocr = HocrOpenAnnotationCreator.new({ + hocr = WaxTasks::HocrOpenAnnotationCreator.new({ hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", collection: 'test_collection', canvas: 'img_item_1', From 53adedfda79bdb9f2ba11ac125bc7dd8a76b3e62 Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Mon, 26 Oct 2020 10:51:48 -0600 Subject: [PATCH 7/9] Add task-level description to README --- README.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 64af620..314d475 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,9 @@ collections: metadata: source: 'objects.csv' # path to the metadata file, must be within '_data' images: - source 'source_images/objects' # path to the directory of source images, must be within '_data' + source: 'source_images/objects' # path to the directory of source images, must be within '_data' + annotations: + source: 'annotations/' # path to the directory of annotations for a collection within `_data` # wax search index settings lunr_index: @@ -122,8 +124,9 @@ lunr_index: ``` The above example includes a single collection `objects` that comprises: -1. a CSV `metadata:source` file (`objects.csv`), and -2. a `images:source` directory of image and pdf files. +1. a CSV `metadata:source` file (`objects.csv`), +2. a `images:source` directory of image and pdf files, and +3. a `annotations` directory for annotation source files. For more information on configuring Jekyll collections for __wax_tasks__, check out the [minicomp/wax wiki](https://minicomp.github.io/wiki/#/wax/) and . @@ -162,6 +165,26 @@ This task does *not* touch your source metadata or source image files! Instead, `$ bundle exec rake wax:clobber collection-name` +### wax:import:hocr + +Reads a given HOCR file and writes a simplified YAML file into ```_data/annotations/```. Takes four arguments: path to the HOCR file, collection, canvas, granularity. The ```canvas``` name is the basename of the corresponding image. Granularity may be one of word, line, or paragraph. (This import functionality is based on Ocracoke.) Imported files are named ```__ocr_.yaml```. + +### wax:annotations + +Renders source files in ```_data/annotations``` to AnnotationList json files in ```img/derivatives/iiif```. Takes a collection name as argument. The source files may be yaml (in the simplified format generated by ```wax:import:hocr``` or json. Json files should be in the normal Wax pre-Jekyll format, with yaml headers: + +``` +--- +layout: none +collection: +canvas: +--- +``` + +### wax:updatemanifest + +Takes a collection name. Collects the ids of annotationlists associated with that collection and adds them to the appropriate canvases in the collection manifest using [```otherContent```](https://iiif.io/api/presentation/2.1/#canvas). + # Contributing Fork/clone the repository. After making code changes, run the tests (`$ bundle exec rubocop` and `$ bundle exec rspec`) before submitting a pull request. You can enable verbose tests with `$ DEBUG=true bundle exec rspec`. From 61fa6246735338bd1ab95153babc10014a2ce8f5 Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Thu, 26 Nov 2020 20:36:13 -0700 Subject: [PATCH 8/9] Bugfix: path to collection annotations --- lib/wax_tasks/site.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/wax_tasks/site.rb b/lib/wax_tasks/site.rb index f56f8eb..a02d33e 100644 --- a/lib/wax_tasks/site.rb +++ b/lib/wax_tasks/site.rb @@ -90,7 +90,7 @@ def generate_annotations(name) raise WaxTasks::Error::InvalidCollection if collection.nil? - output_dir = Utils.safe_join @config.source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif/annotation' + output_dir = Utils.safe_join @config.source, collection.iiif_derivative_source, 'annotation' records = collection.write_annotations output_dir collection.update_metadata records puts Rainbow("\nDone ✔").green From c0f3fcd205f12cadb050054d00c78155d5d08b85 Mon Sep 17 00:00:00 2001 From: Peter Binkley Date: Fri, 27 Nov 2020 15:59:18 -0700 Subject: [PATCH 9/9] Fix item-level identifiers; clean up --- lib/tasks/annotations.rake | 22 +++++--- lib/tasks/import/hocr.rb | 24 +++++---- lib/wax_tasks/annotationlist.rb | 14 ++--- lib/wax_tasks/collection/annotations.rb | 68 +++++++++++++------------ 4 files changed, 69 insertions(+), 59 deletions(-) diff --git a/lib/tasks/annotations.rake b/lib/tasks/annotations.rake index 08e1cd1..d920111 100644 --- a/lib/tasks/annotations.rake +++ b/lib/tasks/annotations.rake @@ -21,17 +21,23 @@ namespace :wax do raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:updatemanifest'").magenta if args.empty? site = WaxTasks::Site.new - config = WaxTasks.config_from_file - dir = 'img/derivatives/iiif/annotation' - - args.each do |collection_name| + args.each do |collection_name| collection = site.collections.find { |c| c.name == collection_name } annotationdata_source = collection.annotationdata_source - - collection.add_annotationlists_to_manifest( - Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort - ) + + # TODO: just crawl the item directories + files = Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort + annotationlists = {} + files.each do |file| + # path like _data/annotations/documents/doc9031/doc9031_1.yaml + filepath = Pathname.new(file) + pid = filepath.dirname.basename.to_s # doc9031 + annotationlists[pid] ||= [] + annotationlists[pid] << file + end + + collection.add_annotationlists_to_manifest(annotationlists) end end end diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb index fdaac62..552f4fa 100644 --- a/lib/tasks/import/hocr.rb +++ b/lib/tasks/import/hocr.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'addressable/template' require 'json' require 'nokogiri' @@ -6,10 +8,10 @@ # adapted from Okracoke: # https://github.com/NCSU-Libraries/ocracoke/blob/master/app/processing_helpers/hocr_open_annotation_creator.rb module WaxTasks +# class HocrOpenAnnotationCreator - def initialize(args) - @hocr = File.open(args[:hocr_path]){ |f| Nokogiri::XML(f) } + @hocr = File.open(args[:hocr_path]) { |f| Nokogiri::XML(f) } @collection = args[:collection] @identifier = args[:canvas] @granularity = args[:granularity] @@ -24,19 +26,19 @@ def initialize(args) @selector = get_selector end - def manifest_canvas_on_xywh(id, xywh) + def manifest_canvas_on_xywh(xywh) "#{@canvas_uri}#xywh=#{xywh}" end def get_selector - if @granularity == "word" - "ocrx_word" - elsif @granularity == "line" - "ocr_line" - elsif @granularity == "paragraph" - "ocr_par" + if @granularity == 'word' + 'ocrx_word' + elsif @granularity == 'line' + 'ocr_line' + elsif @granularity == 'paragraph' + 'ocr_par' else - "" + '' end end @@ -106,7 +108,7 @@ def annotation_id(xywh) end def on_canvas(xywh) - manifest_canvas_on_xywh(@identifier, xywh) + manifest_canvas_on_xywh(xywh) end def to_json diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb index b5f4358..be2f801 100644 --- a/lib/wax_tasks/annotationlist.rb +++ b/lib/wax_tasks/annotationlist.rb @@ -9,17 +9,17 @@ def initialize(annotation_list) # input is in format of annotation list yaml @uri = annotation_list['uri'] @collection = annotation_list['collection'] - @canvas = annotation_list['canvas'] + @canvas = annotation_list['canvas'] @label = annotation_list['label'] @target = annotation_list['target'] @type = 'sc:AnnotationList' - @resources = annotation_list["resources"].map do |resource| + @resources = annotation_list['resources'].map do |resource| { - :@type => resource['type'] || 'cnt:ContentAsText', - chars: resource['chars'] || '', - format: resource['format'] || 'text/plain', - xywh: resource['xywh'] || '' + :@type => resource['type'] || 'cnt:ContentAsText', + chars: resource['chars'] || '', + format: resource['format'] || 'text/plain', + xywh: resource['xywh'] || '' # TODO: extend or subclass this as needed for other kinds of annotations } end @@ -28,7 +28,7 @@ def initialize(annotation_list) def to_json { :@context => 'http://iiif.io/api/presentation/2/context.json', - :@id => @uri, + :@id => @uri, :@type => @type, label: @label, resources: @resources.map do |resource| diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb index b9697c8..fcda46f 100644 --- a/lib/wax_tasks/collection/annotations.rb +++ b/lib/wax_tasks/collection/annotations.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + # module WaxTasks # @@ -22,7 +23,7 @@ def annotations_from_annotationdata Dir.glob(Utils.safe_join(@annotationdata_source, '*')).map do |path| item = WaxTasks::Item.new(path, {}) - item.record = records.find { |r| r.pid == item.pid } + item.record = records.find { |r| r.pid == item.pid } item.annotation_config = @config.dig 'annotations' warn Rainbow("\nCould not find record in #{@annotationdata_source} for image item #{path}.\n").orange if item.record.nil? item @@ -70,37 +71,40 @@ def write_annotations(dir) # def add_annotationlists_to_manifest(annotationlists) dir = 'img/derivatives/iiif/annotation' - collection_dir_name = File.basename(@annotationdata_source) - - manifest_path = Utils.safe_join File.dirname(dir), collection_dir_name, 'manifest.json' - manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] - manifest = JSON.parse(manifest_body) - - annotationlists.each do |list_path| - source_type = get_source_type list_path - - list = nil - canvas_id = nil - - case source_type - when '.yaml' - list = SafeYAML.load_file(list_path) - canvas_id = "#{list['collection']}_#{list['canvas']}" - when '.json' - # TODO: encapsulate this yaml/json handling in a class - list_front_matter, list_body = File.read(list_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] - list_yaml = YAML.load list_front_matter - list = JSON.parse(list_body) - canvas_id = "#{list_yaml['collection']}_#{list_yaml['canvas']}" + + annotationlists.each_key do |pid| + manifest_path = Utils.safe_join File.dirname(dir), pid, 'manifest.json' + manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(manifest_body) + + annotationlists[pid].each do |list_path| + source_type = get_source_type list_path + + list = nil + canvas_id = nil + + case source_type + # TODO: handle '.yml' + when '.yaml' + list = SafeYAML.load_file(list_path) + canvas_id = list['target'] + when '.json' + # TODO: encapsulate this yaml/json handling in a class + list_front_matter, list_body = File.read(list_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + list_yaml = YAML.safe_load(list_front_matter) + list = JSON.parse(list_body) + # TODO: confirm this has correct canvas_id + canvas_id = list_yaml['target'] + end + + add_annotationlist_to_manifest(manifest, list, canvas_id) end - add_annotationlist_to_manifest(manifest, list, canvas_id) - end - # TODO : save only if changed - File.open(manifest_path, 'w') do |f| - f.write("#{manifest_front_matter}#{manifest.to_json}") + # TODO : save only if changed + File.open(manifest_path, 'w') do |f| + f.write("#{manifest_front_matter}#{manifest.to_json}") + end end - end # @@ -109,14 +113,12 @@ def add_annotationlist_to_manifest(manifest, annotationlist, canvas_id) # dir: img/derivatives/iiif/annotation # annotationlist: # annotationlist_uri: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json - dir = 'img/derivatives/iiif/annotation' - annotationlist_uri = annotationlist['uri'] + annotationlist_uri = annotationlist['uri'] annotationlist_uri ||= annotationlist['@id'] # TODO: deal with multiple sequences, possibly containing same canvas (?) this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| - canvas['@id'] == - "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/#{canvas_id}.json" + canvas['@id'] == canvas_id end this_canvas['otherContent'] ||= []