From fb841a7ad218de88595f847adaf0c596c29d7d08 Mon Sep 17 00:00:00 2001 From: kspurgin Date: Thu, 11 Feb 2021 19:55:48 -0500 Subject: [PATCH] More warnings, fewer mapper failures (#96) * dry up `DataHandler.prep` method to use `DataPrepper.prep` * Warn instead of failing when... - a Client term search returns more than one record matching the term. - when incorrect delimiting in repeating field subgroups causes overflow (more subgroups than there are parent groups) Also warn when subgroup values are uneven * bump version --- lib/collectionspace/mapper/data_handler.rb | 6 +-- lib/collectionspace/mapper/data_mapper.rb | 38 +++++++++++++ lib/collectionspace/mapper/data_prepper.rb | 4 ++ lib/collectionspace/mapper/term_handler.rb | 54 ++++++++++++++----- lib/collectionspace/mapper/version.rb | 2 +- .../mapper/data_mapper_spec.rb | 49 ++++++++++++++++- .../datahashes/core/collectionobject2.json | 9 ++++ .../files/xml/core/collectionobject2.xml | 2 + 8 files changed, 144 insertions(+), 20 deletions(-) create mode 100644 spec/fixtures/files/datahashes/core/collectionobject2.json create mode 100644 spec/fixtures/files/xml/core/collectionobject2.xml diff --git a/lib/collectionspace/mapper/data_handler.rb b/lib/collectionspace/mapper/data_handler.rb index 2c10f2b1..691a6ba4 100644 --- a/lib/collectionspace/mapper/data_handler.rb +++ b/lib/collectionspace/mapper/data_handler.rb @@ -54,11 +54,7 @@ def prep(data) response = CollectionSpace::Mapper::setup_data(data) if response.valid? prepper = CollectionSpace::Mapper::DataPrepper.new(response, self) - prepper.split_data - prepper.transform_data - prepper.check_data - prepper.combine_data_fields - prepper.response + prepper.prep else response end diff --git a/lib/collectionspace/mapper/data_mapper.rb b/lib/collectionspace/mapper/data_mapper.rb index 35883854..d093137c 100644 --- a/lib/collectionspace/mapper/data_mapper.rb +++ b/lib/collectionspace/mapper/data_mapper.rb @@ -128,7 +128,37 @@ def map_group(xpath, xphash, targetnode, thisdata) end end + def even_subgroup_field_values?(data) + data.values.map(&:flatten).map(&:length).uniq.length == 1 ? true : false + end + + def add_uneven_subgroup_warning(parent_path:, intervening_path:, subgroup:) + response.warnings << { + category: :uneven_subgroup_field_values, + field: nil, + type: nil, + subtype: nil, + value: nil, + message: "Fields in subgroup #{parent_path}/#{intervening_path.join('/')}/#{subgroup} have different numbers of values" + } + end + + def add_too_many_subgroups_warning(parent_path:, intervening_path:, subgroup:) + response.warnings << { + category: :subgroup_contains_data_for_nonexistent_groups, + field: nil, + type: nil, + subtype: nil, + value: nil, + message: "Data for subgroup #{intervening_path.join('/')}/#{subgroup} is trying to map to more instances of parent group #{parent_path} than exist. Overflow subgroup values will be skipped. The usual cause of this is that you separated subgroup values that belong inside the same parent group with the repeating field delimiter (#{handler.config[:delimiter]}) instead of the subgroup delimiter (#{handler.config[:subgroup_delimiter]})" + } + end + def group_accommodates_subgroup?(groupdata, subgroupdata) + sg_max_length = subgroupdata.values.map(&:flatten).map(&:length).max + sg_max_length <= groupdata.length ? true : false + end + def map_subgroup(xpath, xphash, targetnode, thisdata) parent_path = xphash[:parent] parent_set = @doc.xpath("//#{parent_path}") @@ -143,8 +173,16 @@ def map_subgroup(xpath, xphash, targetnode, thisdata) groups[i] = { parent: p, data: {} } end + add_uneven_subgroup_warning(parent_path: parent_path, + intervening_path: subgroup_path, + subgroup: subgroup) unless even_subgroup_field_values?(thisdata) + add_too_many_subgroups_warning(parent_path: parent_path, + intervening_path: subgroup_path, + subgroup: subgroup) unless group_accommodates_subgroup?(groups, thisdata) + thisdata.each do |f, v| v.each_with_index do |val, i| + next if groups[i].nil? groups[i][:data][f] = val end end diff --git a/lib/collectionspace/mapper/data_prepper.rb b/lib/collectionspace/mapper/data_prepper.rb index c674b6d5..be39b4b5 100644 --- a/lib/collectionspace/mapper/data_prepper.rb +++ b/lib/collectionspace/mapper/data_prepper.rb @@ -45,6 +45,8 @@ def transform_date_fields def handle_term_fields @xphash.each{ |xpath, hash| do_term_handling(xpath, hash) } + @response.warnings.flatten! + @response.errors.flatten! @response.transformed_data end @@ -178,6 +180,8 @@ def do_term_handling(xpath, xphash) config: @config) @response.transformed_data[column] = th.result @response.terms << th.terms + @response.warnings << th.warnings unless th.warnings.empty? + @response.errors << th.errors unless th.errors.empty? end end diff --git a/lib/collectionspace/mapper/term_handler.rb b/lib/collectionspace/mapper/term_handler.rb index 9bbd0521..ec735504 100644 --- a/lib/collectionspace/mapper/term_handler.rb +++ b/lib/collectionspace/mapper/term_handler.rb @@ -3,7 +3,9 @@ module CollectionSpace module Mapper class TermHandler - attr_reader :result, :terms + attr_reader :result, :terms, :warnings, :errors, + :column, :source_type, :type, :subtype + attr_accessor :value def initialize(mapping:, data:, client:, cache:, config:) @mapping = mapping @column = mapping[:datacolumn] @@ -23,6 +25,8 @@ def initialize(mapping:, data:, client:, cache:, config:) @type = 'vocabularies' @subtype = @mapping[:transforms][:vocabulary] end + @warnings = [] + @errors = [] handle_terms end @@ -37,12 +41,14 @@ def handle_terms end def handle_term(val) + @value = val + return '' if val.blank? added = false term_report = { - category: @source_type, - field: @column + category: source_type, + field: column } if in_cache?(val) @@ -63,44 +69,66 @@ def handle_term(val) unless added refname_obj = CollectionSpace::Mapper::Tools::RefName.new( - source_type: @source_type, - type: @type, - subtype: @subtype, + source_type: source_type, + type: type, + subtype: subtype, term: val, cache: @cache) @terms << term_report.merge({ found: false, refname: refname_obj }) - @cache.put(@type, @subtype, val, refname_obj.urn) + @cache.put(type, subtype, val, refname_obj.urn) refname_urn = refname_obj.urn end refname_urn end def in_cache?(val) - @cache.exists?(@type, @subtype, val) + @cache.exists?(type, subtype, val) end def cached_term(val) - @cache.get(@type, @subtype, val, search: false) + @cache.get(type, subtype, val, search: false) end def searched_term(val) begin response = @client.find( - type: @type, - subtype: @subtype, + type: type, + subtype: subtype, value: val, field: search_field ) rescue StandardError => e puts e.message else - response.parsed.dig('abstract_common_list', 'list_item', 'refName') + response_term_refname(response) end end + def response_term_refname(response) + term_ct = response.parsed.dig('abstract_common_list', 'totalItems') + return nil if term_ct.nil? + + if term_ct.to_i == 1 + refname = response.parsed.dig('abstract_common_list', 'list_item', 'refName') + elsif term_ct.to_i > 1 + rec = response.parsed.dig('abstract_common_list', 'list_item')[0] + using_uri = "#{@client.config.base_uri}#{rec['uri']}" + refname = rec['refName'] + warnings << { + category: :multiple_records_found_for_term, + field: column, + type: type, + subtype: subtype, + value: value, + message: "#{term_ct} records found. Using #{using_uri}" + } + end + refname + end + def search_field begin - field = CollectionSpace::Service.get(type: @type)[:term] + field = CollectionSpace::Service.get(type: type)[:term] rescue StandardError => e puts e.message else diff --git a/lib/collectionspace/mapper/version.rb b/lib/collectionspace/mapper/version.rb index 73adfce4..a6b0b153 100644 --- a/lib/collectionspace/mapper/version.rb +++ b/lib/collectionspace/mapper/version.rb @@ -1,5 +1,5 @@ module CollectionSpace module Mapper - VERSION = "2.1.4" + VERSION = "2.1.5" end end diff --git a/spec/collectionspace/mapper/data_mapper_spec.rb b/spec/collectionspace/mapper/data_mapper_spec.rb index 58005637..dd27fdfe 100644 --- a/spec/collectionspace/mapper/data_mapper_spec.rb +++ b/spec/collectionspace/mapper/data_mapper_spec.rb @@ -6,6 +6,52 @@ before(:all) do @config = CollectionSpace::Mapper::DEFAULT_CONFIG end + + context 'core profile' do + before(:all) do + @client = core_client + @cache = core_cache + populate_core(@cache) + end + context 'collectionobject record' do + before(:all) do + @collectionobject_mapper = get_json_record_mapper(path: 'spec/fixtures/files/mappers/release_6_1/core/core_6_1_0-collectionobject.json') + @handler = CollectionSpace::Mapper::DataHandler.new(record_mapper: @collectionobject_mapper, client: @client, cache: @cache, config: @config) + end + context 'overflow subgroup record with uneven subgroup values' do + before(:all) do + @datahash = get_datahash(path: 'spec/fixtures/files/datahashes/core/collectionobject2.json') + @prepper = CollectionSpace::Mapper::DataPrepper.new(@datahash, @handler) + @mapper = CollectionSpace::Mapper::DataMapper.new(@prepper.prep, @handler, @prepper.xphash) + @mapped_doc = remove_namespaces(@mapper.response.doc) + @mapped_xpaths = list_xpaths(@mapped_doc) + @fixture_doc = get_xml_fixture('core/collectionobject2.xml') + @fixture_xpaths = test_xpaths(@fixture_doc, @handler.mapper[:mappings]) + end + it 'mapper response includes overflow subgroup warning' do + w = @mapper.response.warnings.any?{ |w| w[:category] == :subgroup_contains_data_for_nonexistent_groups } + expect(w).to be true + end + it 'mapper response includes uneven subgroup values warning' do + w = @mapper.response.warnings.any?{ |w| w[:category] == :uneven_subgroup_field_values } + expect(w).to be true + end + it 'does not map unexpected fields' do + diff = @mapped_xpaths - @fixture_xpaths + expect(diff).to eq([]) + end + + it 'maps as expected' do + @fixture_xpaths.each do |xpath| + fixture_node = standardize_value(@fixture_doc.xpath(xpath).text) + mapped_node = standardize_value(@mapped_doc.xpath(xpath).text) + expect(mapped_node).to eq(fixture_node) + end + end + end + end + end + context 'lhmc profile' do before(:all) do @client = lhmc_client @@ -45,7 +91,8 @@ end end end - end + end + context 'botgarden profile' do before(:all) do diff --git a/spec/fixtures/files/datahashes/core/collectionobject2.json b/spec/fixtures/files/datahashes/core/collectionobject2.json new file mode 100644 index 00000000..4ec97406 --- /dev/null +++ b/spec/fixtures/files/datahashes/core/collectionobject2.json @@ -0,0 +1,9 @@ +{ + "objectNumber": "21CS.001.0002", + "measuredPart": "unframed", + "dimensionSummary": "8.5 x 11", + "dimension": "height| width", + "measuredByOrganization": "Admirable Corporation", + "value": "8.5| 11", + "measurementUnit": "inches" +} diff --git a/spec/fixtures/files/xml/core/collectionobject2.xml b/spec/fixtures/files/xml/core/collectionobject2.xml new file mode 100644 index 00000000..a6514d1f --- /dev/null +++ b/spec/fixtures/files/xml/core/collectionobject2.xml @@ -0,0 +1,2 @@ + +2021-02-12T00:31:59.813Zadmin@core.collectionspace.orgadmin@core.collectionspace.orgproject1urn:cspace:core.collectionspace.org:collectionobjects:id(98e3dcf4-2ae5-4c14-8e02)'21CS.001.0002'/collectionobjects/98e3dcf4-2ae5-4c14-8e022021-02-12T00:34:42.786Zfalsefalsefalse21CS.001.0002false8.5 x 11unframedurn:cspace:core.collectionspace.org:orgauthorities:name(organization):item:name(AdmirableCorporation1613089843473)'Admirable Corporation'height8.5inchesfalsefalseurn:cspace:indeterminate:locationauthorities:name(indeterminate):item:name(indeterminate)'~Indeterminate Location~'falsed884e854-a10b-49c8-87e7-e7a900ffdc52Administratoradmin@core.collectionspace.org1 \ No newline at end of file