diff --git a/Gemfile b/Gemfile index bf636a6..da5890c 100644 --- a/Gemfile +++ b/Gemfile @@ -3,9 +3,13 @@ source 'https://rubygems.org' gemspec +gem 'addressable' + # dev/test utilities gem 'bundle-audit', require: false +gem 'byebug', require: false gem 'diane', require: false +gem 'nokogiri', require: false gem 'rubocop', require: false gem 'simplecov', '0.17.1', require: false gem 'yard', require: false diff --git a/README.md b/README.md index 64af620..314d475 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,9 @@ collections: metadata: source: 'objects.csv' # path to the metadata file, must be within '_data' images: - source 'source_images/objects' # path to the directory of source images, must be within '_data' + source: 'source_images/objects' # path to the directory of source images, must be within '_data' + annotations: + source: 'annotations/' # path to the directory of annotations for a collection within `_data` # wax search index settings lunr_index: @@ -122,8 +124,9 @@ lunr_index: ``` The above example includes a single collection `objects` that comprises: -1. a CSV `metadata:source` file (`objects.csv`), and -2. a `images:source` directory of image and pdf files. +1. a CSV `metadata:source` file (`objects.csv`), +2. a `images:source` directory of image and pdf files, and +3. a `annotations` directory for annotation source files. For more information on configuring Jekyll collections for __wax_tasks__, check out the [minicomp/wax wiki](https://minicomp.github.io/wiki/#/wax/) and . @@ -162,6 +165,26 @@ This task does *not* touch your source metadata or source image files! Instead, `$ bundle exec rake wax:clobber collection-name` +### wax:import:hocr + +Reads a given HOCR file and writes a simplified YAML file into ```_data/annotations/```. Takes four arguments: path to the HOCR file, collection, canvas, granularity. The ```canvas``` name is the basename of the corresponding image. Granularity may be one of word, line, or paragraph. (This import functionality is based on Ocracoke.) Imported files are named ```__ocr_.yaml```. + +### wax:annotations + +Renders source files in ```_data/annotations``` to AnnotationList json files in ```img/derivatives/iiif```. Takes a collection name as argument. The source files may be yaml (in the simplified format generated by ```wax:import:hocr``` or json. Json files should be in the normal Wax pre-Jekyll format, with yaml headers: + +``` +--- +layout: none +collection: +canvas: +--- +``` + +### wax:updatemanifest + +Takes a collection name. Collects the ids of annotationlists associated with that collection and adds them to the appropriate canvases in the collection manifest using [```otherContent```](https://iiif.io/api/presentation/2.1/#canvas). + # Contributing Fork/clone the repository. After making code changes, run the tests (`$ bundle exec rubocop` and `$ bundle exec rspec`) before submitting a pull request. You can enable verbose tests with `$ DEBUG=true bundle exec rspec`. diff --git a/lib/tasks/annotations.rake b/lib/tasks/annotations.rake new file mode 100644 index 0000000..d920111 --- /dev/null +++ b/lib/tasks/annotations.rake @@ -0,0 +1,43 @@ +# frozen_string_literal: true + +require 'wax_tasks' + +namespace :wax do + desc 'generate annotationlists from local yaml/json files' + task :annotations do + args = ARGV.drop(1).each { |a| task a.to_sym } + args.reject! { |a| a.start_with? '-' } + + raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:annotations'").magenta if args.empty? + + site = WaxTasks::Site.new + args.each { |a| site.generate_annotations(a) } + end + + task :updatemanifest do + args = ARGV.drop(1).each { |a| task a.to_sym } + args.reject! { |a| a.start_with? '-' } + + raise WaxTasks::Error::MissingArguments, Rainbow("You must specify a collection after 'wax:updatemanifest'").magenta if args.empty? + + site = WaxTasks::Site.new + + args.each do |collection_name| + collection = site.collections.find { |c| c.name == collection_name } + annotationdata_source = collection.annotationdata_source + + # TODO: just crawl the item directories + files = Dir.glob("#{annotationdata_source}/**/*.{yaml,yml,json}").sort + annotationlists = {} + files.each do |file| + # path like _data/annotations/documents/doc9031/doc9031_1.yaml + filepath = Pathname.new(file) + pid = filepath.dirname.basename.to_s # doc9031 + annotationlists[pid] ||= [] + annotationlists[pid] << file + end + + collection.add_annotationlists_to_manifest(annotationlists) + end + end +end diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake new file mode 100644 index 0000000..5792816 --- /dev/null +++ b/lib/tasks/import.rake @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require_relative './import/hocr.rb' + +namespace :wax do + namespace :import do + task :hocr, [:hocr_path, :collection, :canvas, :granularity] do |_t, args| + desc 'generate canvas-level annotationlist yaml file from hocr file' + + # TODO: validate args + + hocr_annotations = WaxTasks::HocrOpenAnnotationCreator.new(args) + hocr_annotations.save + + puts 'done' + end + end +end diff --git a/lib/tasks/import/hocr.rb b/lib/tasks/import/hocr.rb new file mode 100644 index 0000000..552f4fa --- /dev/null +++ b/lib/tasks/import/hocr.rb @@ -0,0 +1,150 @@ +# frozen_string_literal: true + +require 'addressable/template' +require 'json' +require 'nokogiri' +require 'yaml' + +# adapted from Okracoke: +# https://github.com/NCSU-Libraries/ocracoke/blob/master/app/processing_helpers/hocr_open_annotation_creator.rb +module WaxTasks +# +class HocrOpenAnnotationCreator + def initialize(args) + @hocr = File.open(args[:hocr_path]) { |f| Nokogiri::XML(f) } + @collection = args[:collection] + @identifier = args[:canvas] + @granularity = args[:granularity] + + @uri_root = "{{ '/' | absolute_url }}\img/derivatives/iiif" + @canvas_root = "#{@collection}_#{@identifier}" + @label = "#{@canvas_root}_ocr_#{@granularity}" + + @canvas_uri = "#{@uri_root}/canvas/#{@canvas_root}.json" + @list_uri = "#{@uri_root}/annotation/#{@label}.json" + + @selector = get_selector + end + + def manifest_canvas_on_xywh(xywh) + "#{@canvas_uri}#xywh=#{xywh}" + end + + def get_selector + if @granularity == 'word' + 'ocrx_word' + elsif @granularity == 'line' + 'ocr_line' + elsif @granularity == 'paragraph' + 'ocr_par' + else + '' + end + end + + def resources + @hocr.xpath(".//*[contains(@class, '#{@selector}')]").map do |chunk| + text = chunk.text().gsub("\n", ' ').squeeze(' ').strip + if !text.empty? + title = chunk['title'] + title_parts = title.split('; ') + xywh = '0,0,0,0' + title_parts.each do |title_part| + if title_part.include?('bbox') + match_data = /bbox\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/.match title_part + x = match_data[1].to_i + y = match_data[2].to_i + x1 = match_data[3].to_i + y1 = match_data[4].to_i + w = x1 - x + h = y1 - y + xywh = "#{x},#{y},#{w},#{h}" + end + end + annotation(text, xywh) + end + end.compact + end + + def annotation_list + { + :"@context" => "http://iiif.io/api/presentation/2/context.json", + :"@id" => annotation_list_id, + :"@type" => "sc:AnnotationList", + :"@label" => "OCR text with granularity of #{@granularity}", + resources: resources + } + end + + def annotation_list_id_base + "{{ '/' | absolute_url }}\ +img/derivatives/iiif/canvas/\ +#{@collection}/#{@identifier}-annotation-list-#{@granularity}.json" + + #File.join OKRACOKE_BASE_URL, @identifier + '-annotation-list-' + @granularity + end + + def annotation_list_id + annotation_list_id_base + '.json' + end + + def annotation(chars, xywh) + { + :"@id" => annotation_id(xywh), + :"@type" => "oa:Annotation", + motivation: "sc:painting", + resource: { + :"@type" => "cnt:ContentAsText", + format: "text/plain", + chars: chars + }, + # TODO: use canvas_url_template + on: on_canvas(xywh) + } + end + + def annotation_id(xywh) + File.join annotation_list_id_base, xywh + end + + def on_canvas(xywh) + manifest_canvas_on_xywh(xywh) + end + + def to_json + annotation_list.to_json + end + + def id + @identifier + end + + def to_yaml + yaml_list = { + 'uri' => @list_uri, + 'collection' => @collection, + 'canvas' => @identifier, + 'label' => @label, + 'target' => @canvas_uri, + 'resources' => [] + } + annotation_list[:resources].each do |resource| + yaml_list['resources'] << { + 'xywh' => resource[:@id].sub(/.*\/(.*)/, '\1'), + 'chars' => resource[:resource][:chars] + } + end + yaml_list.to_yaml + end + + def save + FileUtils.mkdir_p("./_data/annotations/#{@collection}/#{@collection}") + # TODO: handle item as distinct from collection + # TODO: do not overwrite existing file without asking + File.open("./_data/annotations/#{@collection}/#{@collection}/#{@collection}_#{@identifier}_ocr_#{@granularity}.yaml", 'w') do |file| + file.write(to_yaml) + end + end + +end +end diff --git a/lib/wax_tasks.rb b/lib/wax_tasks.rb index 0970828..427b470 100644 --- a/lib/wax_tasks.rb +++ b/lib/wax_tasks.rb @@ -13,6 +13,8 @@ require 'safe_yaml' # relative +require_relative 'wax_tasks/annotation' +require_relative 'wax_tasks/annotationlist' require_relative 'wax_tasks/asset' require_relative 'wax_tasks/collection' require_relative 'wax_tasks/config' diff --git a/lib/wax_tasks/annotation.rb b/lib/wax_tasks/annotation.rb new file mode 100644 index 0000000..3c8e3cd --- /dev/null +++ b/lib/wax_tasks/annotation.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +# +class Annotation + def initialize(annotationlist_id, canvas_id, xywh, + resource = {}, options = {}) + @annotationlist_id = annotationlist_id + @canvas_id = canvas_id + @xywh = xywh # TODO: validate xywh + @type = 'oa:Annotation' + @motivation = options[:motivation] || 'sc:painting' + @resource = + { + :@type => resource[:type] || 'cnt:ContentAsText', + chars: resource[:chars] || '', + format: resource[:format] || 'text/plain' + # TODO: extend or subclass this as needed for other kinds of annotations + } + @resource[:language] = resource[:language] unless resource[:language].nil? + end + + def to_hash + { + :@context => 'http://iiif.io/api/presentation/2/context.json', + :@id => @annotationlist_id + '#' + @xywh, + :@type => @type, + motivation: @motivation, + resource: @resource, + on: @canvas_id + '#' + @xywh + } + end +end diff --git a/lib/wax_tasks/annotationlist.rb b/lib/wax_tasks/annotationlist.rb new file mode 100644 index 0000000..be2f801 --- /dev/null +++ b/lib/wax_tasks/annotationlist.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module WaxTasks + # + class AnnotationList + attr_reader :canvas, :label + + def initialize(annotation_list) + # input is in format of annotation list yaml + @uri = annotation_list['uri'] + @collection = annotation_list['collection'] + @canvas = annotation_list['canvas'] + @label = annotation_list['label'] + @target = annotation_list['target'] + + @type = 'sc:AnnotationList' + @resources = annotation_list['resources'].map do |resource| + { + :@type => resource['type'] || 'cnt:ContentAsText', + chars: resource['chars'] || '', + format: resource['format'] || 'text/plain', + xywh: resource['xywh'] || '' + # TODO: extend or subclass this as needed for other kinds of annotations + } + end + end + + def to_json + { + :@context => 'http://iiif.io/api/presentation/2/context.json', + :@id => @uri, + :@type => @type, + label: @label, + resources: @resources.map do |resource| + { + :@type => 'oa:Annotation', + motivation: 'sc:painting', + resource: { + :@type => resource[:@type], + format: resource[:format], + chars: resource[:chars] + }, + on: "#{@target}#xywh=#{resource[:xywh]}" + } + end + }.to_json + end + + def save + path = "#{dir}/#{Utils.slug(@pid)}.md" + if File.exist? path + 0 + else + FileUtils.mkdir_p File.dirname(path) + File.open(path, 'w') { |f| f.puts "#{@hash.to_yaml}---" } + 1 + end + end + end +end diff --git a/lib/wax_tasks/collection.rb b/lib/wax_tasks/collection.rb index e672c9a..819269e 100644 --- a/lib/wax_tasks/collection.rb +++ b/lib/wax_tasks/collection.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require_relative 'collection/annotations' require_relative 'collection/images' require_relative 'collection/metadata' @@ -8,10 +9,12 @@ module WaxTasks class Collection attr_reader :name, :config, :ext, :search_fields, :page_source, :metadata_source, :imagedata_source, - :iiif_derivative_source, :simple_derivative_source + :iiif_derivative_source, :simple_derivative_source, + :annotationdata_source include Collection::Metadata include Collection::Images + include Collection::Annotations IMAGE_DERIVATIVE_DIRECTORY = 'img/derivatives' DEFAULT_VARIANTS = { 'thumbnail' => 250, 'fullwidth' => 1140 }.freeze @@ -27,6 +30,7 @@ def initialize(name, config, source, collections_dir, ext) @metadata_source = Utils.safe_join source, '_data', config.dig('metadata', 'source') @imagedata_source = Utils.safe_join source, '_data', config.dig('images', 'source') @iiif_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'iiif' + @annotationdata_source = Utils.safe_join source, '_data', config.dig('annotations', 'source') @simple_derivative_source = Utils.safe_join source, IMAGE_DERIVATIVE_DIRECTORY, 'simple' @search_fields = %w[pid label thumbnail permalink collection] @image_variants = image_variants diff --git a/lib/wax_tasks/collection/annotations.rb b/lib/wax_tasks/collection/annotations.rb new file mode 100644 index 0000000..fcda46f --- /dev/null +++ b/lib/wax_tasks/collection/annotations.rb @@ -0,0 +1,139 @@ +# frozen_string_literal: true + +# +module WaxTasks + # + class Collection + # + module Annotations + # + # + def get_source_type(source_path) + source_type = File.extname source_path # '.yaml' or '.json' + source_type = '.yaml' if source_type == '.yml' + source_type + end + + # + # + def annotations_from_annotationdata + raise Error::MissingSource, "Cannot find annotation data source '#{@annotationdata_source}'" unless Dir.exist? @annotationdata_source + + records = records_from_metadata + + Dir.glob(Utils.safe_join(@annotationdata_source, '*')).map do |path| + item = WaxTasks::Item.new(path, {}) + item.record = records.find { |r| r.pid == item.pid } + item.annotation_config = @config.dig 'annotations' + warn Rainbow("\nCould not find record in #{@annotationdata_source} for image item #{path}.\n").orange if item.record.nil? + item + end.compact + end + + # + # + def write_annotations(dir) + puts Rainbow("Generating annotations for collection '#{@name}'").cyan + bar = ProgressBar.new(annotations_from_annotationdata.length) + bar.write + annotations_from_annotationdata.map do |item| + item.annotations.each do |source_path| + dest_path = "#{Utils.safe_join dir, File.basename(source_path, '.*')}.json" + # img/derivatives/iiif/annotation/test_collection_0_ocr_paragraph.json + FileUtils.mkdir_p File.dirname(dest_path) + next if File.exist? dest_path + + source_type = get_source_type source_path + case source_type + when '.yaml' + # load yaml, write json + annotationlist = WaxTasks::AnnotationList.new(SafeYAML.load_file(source_path)) + File.write(dest_path, "---\nlayout: none\n---\n#{annotationlist.to_json}\n") + + # add_annotationlist_to_manifest(annotationlist, dest_path) + when '.json' + # TODO: handle json input - we assume it has uris in final jekyll-ready form + # e.g. {{ '/' | absolute_url }}img/derivatives/iiif/annotation/recipebook_002_clippings.json + + FileUtils.cp source_path, dest_path + end + end + + # TODO: do we want to update the item-level csv? + + bar.increment! + bar.write + item + end.flat_map(&:record).compact + end + + # + # + def add_annotationlists_to_manifest(annotationlists) + dir = 'img/derivatives/iiif/annotation' + + annotationlists.each_key do |pid| + manifest_path = Utils.safe_join File.dirname(dir), pid, 'manifest.json' + manifest_front_matter, manifest_body = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(manifest_body) + + annotationlists[pid].each do |list_path| + source_type = get_source_type list_path + + list = nil + canvas_id = nil + + case source_type + # TODO: handle '.yml' + when '.yaml' + list = SafeYAML.load_file(list_path) + canvas_id = list['target'] + when '.json' + # TODO: encapsulate this yaml/json handling in a class + list_front_matter, list_body = File.read(list_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + list_yaml = YAML.safe_load(list_front_matter) + list = JSON.parse(list_body) + # TODO: confirm this has correct canvas_id + canvas_id = list_yaml['target'] + end + + add_annotationlist_to_manifest(manifest, list, canvas_id) + end + + # TODO : save only if changed + File.open(manifest_path, 'w') do |f| + f.write("#{manifest_front_matter}#{manifest.to_json}") + end + end + end + + # + # + def add_annotationlist_to_manifest(manifest, annotationlist, canvas_id) + # dir: img/derivatives/iiif/annotation + # annotationlist: + # annotationlist_uri: img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json + annotationlist_uri = annotationlist['uri'] + annotationlist_uri ||= annotationlist['@id'] + + # TODO: deal with multiple sequences, possibly containing same canvas (?) + this_canvas = manifest['sequences'][0]['canvases'].find do |canvas| + canvas['@id'] == canvas_id + end + + this_canvas['otherContent'] ||= [] + + # TODO: remove entries for annotationlists that have been deleted + + if this_canvas['otherContent'].find { |c| c['@id'] == annotationlist_uri } + puts "AnnotationList #{canvas_id} already linked in Manifest" + else + this_canvas['otherContent'] << { + '@id' => annotationlist_uri, + '@type' => 'sc:AnnotationList' + } + end + end + end + end +end diff --git a/lib/wax_tasks/item.rb b/lib/wax_tasks/item.rb index 7b79e06..4275eca 100644 --- a/lib/wax_tasks/item.rb +++ b/lib/wax_tasks/item.rb @@ -3,7 +3,7 @@ module WaxTasks # class Item - attr_accessor :record, :iiif_config + attr_accessor :record, :iiif_config, :annotation_config attr_reader :pid # @@ -60,6 +60,13 @@ def simple_derivatives @assets.map(&:simple_derivatives).flatten end + # + # + def annotations + # TODO: integrate this with assets handling? + Dir.glob("#{@path}/*").sort + end + # # def logo diff --git a/lib/wax_tasks/site.rb b/lib/wax_tasks/site.rb index 7c5852b..a02d33e 100644 --- a/lib/wax_tasks/site.rb +++ b/lib/wax_tasks/site.rb @@ -82,5 +82,18 @@ def generate_derivatives(name, type) collection.update_metadata records puts Rainbow("\nDone ✔").green end + + # + # + def generate_annotations(name) + collection = @config.find_collection name + + raise WaxTasks::Error::InvalidCollection if collection.nil? + + output_dir = Utils.safe_join @config.source, collection.iiif_derivative_source, 'annotation' + records = collection.write_annotations output_dir + collection.update_metadata records + puts Rainbow("\nDone ✔").green + end end end diff --git a/spec/sample_hocr/img_item_1.hocr b/spec/sample_hocr/img_item_1.hocr new file mode 100644 index 0000000..d8d244f --- /dev/null +++ b/spec/sample_hocr/img_item_1.hocr @@ -0,0 +1,23 @@ + + + + + + + + + + +
+
+

+ If the ax + + falls the + +

+
+
+ + diff --git a/spec/sample_hocr/manifest.json b/spec/sample_hocr/manifest.json new file mode 100644 index 0000000..4363a0d --- /dev/null +++ b/spec/sample_hocr/manifest.json @@ -0,0 +1,49 @@ +--- +layout: none +--- +{ + "@context": "http://iiif.io/api/presentation/2/context.json", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/test_collection/manifest.json", + "@type": "sc:Manifest", + "label": "test_collection", + "thumbnail": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/250,/0/default.jpg", + "viewingDirection": "left-to-right", + "viewingHint": "individuals", + "sequences": [ + { + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/sequence/test_collection_img_item_1.json", + "@type": "sc:Sequence", + "canvases": [ + { + "@type": "sc:Canvas", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json", + "label": "Page 1", + "width": 2600, + "height": 1697, + "thumbnail": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/250,/0/default.jpg", + "images": [ + { + "@type": "oa:Annotation", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1.json", + "motivation": "sc:painting", + "resource": { + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/full/0/default.jpg", + "@type": "dcterms:Image", + "format": "image/jpeg", + "service": { + "@context": "http://iiif.io/api/image/2/context.json", + "@id": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1", + "profile": "http://iiif.io/api/image/2/level0.json" + }, + "width": 2600, + "height": 1697 + }, + "on": "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" + } + ] + } + ] + } + ], + "full": "{{ '/' | absolute_url }}img/derivatives/iiif/images/test_collection_img_item_1/full/full/0/default.jpg" +} \ No newline at end of file diff --git a/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml new file mode 100644 index 0000000..a0f4872 --- /dev/null +++ b/spec/sample_hocr/test_collection_img_item_1_ocr_paragraph.yaml @@ -0,0 +1,9 @@ +--- +uri: "{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" +collection: test_collection +canvas: 'img_item_1' +label: test_collection_img_item_1_ocr_paragraph +target: "{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json" +resources: +- xywh: '20,668,171,100' + chars: If the ax falls the diff --git a/spec/sample_site/_config.yml b/spec/sample_site/_config.yml index 89f3bba..247f73f 100644 --- a/spec/sample_site/_config.yml +++ b/spec/sample_site/_config.yml @@ -10,6 +10,8 @@ collections: source: valid.csv images: source: 'images/test_collection' + annotations: + source: 'annotations/test_collection' json_collection: layout: default.html metadata: diff --git a/spec/wax_tasks/annotations_spec.rb b/spec/wax_tasks/annotations_spec.rb new file mode 100644 index 0000000..9f4b91e --- /dev/null +++ b/spec/wax_tasks/annotations_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +describe WaxTasks::AnnotationList do + include_context 'shared' + + before(:all) do + Test.reset + end + + # + # =================================================== + # ANNOTATION.UPDATEMANIFEST + # =================================================== + # + describe '#updatemanifest' do + context 'updates manifest' do + it 'updates manifest' do + FileUtils.mkdir_p "#{BUILD}/img/derivatives/iiif/test_collection/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/manifest.json"), "#{BUILD}/img/derivatives/iiif/test_collection/" + FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + + config = WaxTasks::Config.new(config || WaxTasks.config_from_file) + collection = config.find_collection 'csv_collection' + + collection.add_annotationlists_to_manifest( + Dir.glob("#{BUILD}/_data/annotations/test_collection/dir_imgs_item/*.{yaml,yml,json}").sort + ) + + manifest_path = "#{BUILD}/img/derivatives/iiif/test_collection/manifest.json" + raw_yaml, raw_json = File.read(manifest_path).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + manifest = JSON.parse(raw_json) + + expect(manifest['sequences'][0]['canvases'][0]['otherContent']).not_to be_nil + expect(manifest['sequences'][0]['canvases'][0]['otherContent'][0]['@id']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json") + end + end + end +end diff --git a/spec/wax_tasks/hocr_spec.rb b/spec/wax_tasks/hocr_spec.rb new file mode 100644 index 0000000..fa4d2c3 --- /dev/null +++ b/spec/wax_tasks/hocr_spec.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +require_relative '../../lib/tasks/import/hocr.rb' + +describe WaxTasks::HocrOpenAnnotationCreator do + include_context 'shared' + + before(:all) do + Test.reset + end + + # + # =================================================== + # HocrOpenAnnotationCreator.NEW + # =================================================== + # + describe '#new' do + include_context 'shared' + + context 'parses hocr file not to raise error' do + it 'runs without errors' do + expect { WaxTasks::HocrOpenAnnotationCreator.new({ + hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", + collection: 'test_collection', + canvas: 'img_item_1', + granularity: 'paragraph' + }) }.not_to raise_error + end + end + end + + describe '#save' do + include_context 'shared' + + context 'hocr yaml file' do + hocr = WaxTasks::HocrOpenAnnotationCreator.new({ + hocr_path: "#{ROOT}/spec/sample_hocr/img_item_1.hocr", + collection: 'test_collection', + canvas: 'img_item_1', + granularity: 'paragraph' + }) + + it 'captures chars correctly' do + expect(hocr.resources.first[:resource][:chars]).to eq('If the ax falls the') + end + + it 'captures target correctly' do + expect(hocr.resources.first[:on]).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") + end + end + end + +end diff --git a/spec/wax_tasks/site_spec.rb b/spec/wax_tasks/site_spec.rb index da4f427..a6066e4 100644 --- a/spec/wax_tasks/site_spec.rb +++ b/spec/wax_tasks/site_spec.rb @@ -272,4 +272,39 @@ end end end + + # + # =================================================== + # SITE.GENERATE_ANNOTATIONLISTS (NAME) + # =================================================== + # + + describe '#generate_annotationlists' do + before(:example) do + FileUtils.mkdir_p "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + FileUtils.cp Dir.glob("#{ROOT}/spec/sample_hocr/*.yaml"), "#{BUILD}/_data/annotations/test_collection/dir_imgs_item/" + end + + # TODO: mock or stub the annotation and manifest files, break up this block + context 'when generates sample annotationlist' do + it 'runs without error' do + expect { site_from_config_file.generate_annotations('csv_collection') }.not_to raise_error + end + + it 'generates annotationlist' do + json_file = "#{BUILD}/img/derivatives/iiif/annotation/test_collection_img_item_1_ocr_paragraph.json" + expect(File).to exist(json_file) + raw_yaml, raw_json = File.read(json_file).match(/(---\n.+?\n---\n)(.*)/m)[1..2] + annotation = JSON.parse(raw_json)['resources'].first + expect(annotation['on']).to eq("{{ '/' | absolute_url }}img/derivatives/iiif/canvas/test_collection_img_item_1.json#xywh=20,668,171,100") + expect(annotation['resource']['chars']).to eq('If the ax falls the') + end + end + + after(:example) do + FileUtils.rm Dir.glob("#{BUILD}/_data/annotations/test_collection/dir_imgs_item/*.yaml") + FileUtils.rm Dir.glob("#{BUILD}/img/derivatives/iiif/test_collection/manifest.json") + end + end + end