Skip to content

Commit

Permalink
More warnings, fewer mapper failures (#96)
Browse files Browse the repository at this point in the history
* dry up `DataHandler.prep` method to use `DataPrepper.prep`

* Warn instead of failing when...
- a Client term search returns more than one record matching the term.
- when incorrect delimiting in repeating field subgroups causes
overflow (more subgroups than there are parent groups)
Also warn when subgroup values are uneven

* bump version
  • Loading branch information
kspurgin authored Feb 12, 2021
1 parent 7a91431 commit fb841a7
Show file tree
Hide file tree
Showing 8 changed files with 144 additions and 20 deletions.
6 changes: 1 addition & 5 deletions lib/collectionspace/mapper/data_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,7 @@ def prep(data)
response = CollectionSpace::Mapper::setup_data(data)
if response.valid?
prepper = CollectionSpace::Mapper::DataPrepper.new(response, self)
prepper.split_data
prepper.transform_data
prepper.check_data
prepper.combine_data_fields
prepper.response
prepper.prep
else
response
end
Expand Down
38 changes: 38 additions & 0 deletions lib/collectionspace/mapper/data_mapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,37 @@ def map_group(xpath, xphash, targetnode, thisdata)
end
end

def even_subgroup_field_values?(data)
data.values.map(&:flatten).map(&:length).uniq.length == 1 ? true : false
end

def add_uneven_subgroup_warning(parent_path:, intervening_path:, subgroup:)
response.warnings << {
category: :uneven_subgroup_field_values,
field: nil,
type: nil,
subtype: nil,
value: nil,
message: "Fields in subgroup #{parent_path}/#{intervening_path.join('/')}/#{subgroup} have different numbers of values"
}
end

def add_too_many_subgroups_warning(parent_path:, intervening_path:, subgroup:)
response.warnings << {
category: :subgroup_contains_data_for_nonexistent_groups,
field: nil,
type: nil,
subtype: nil,
value: nil,
message: "Data for subgroup #{intervening_path.join('/')}/#{subgroup} is trying to map to more instances of parent group #{parent_path} than exist. Overflow subgroup values will be skipped. The usual cause of this is that you separated subgroup values that belong inside the same parent group with the repeating field delimiter (#{handler.config[:delimiter]}) instead of the subgroup delimiter (#{handler.config[:subgroup_delimiter]})"
}
end

def group_accommodates_subgroup?(groupdata, subgroupdata)
sg_max_length = subgroupdata.values.map(&:flatten).map(&:length).max
sg_max_length <= groupdata.length ? true : false
end

def map_subgroup(xpath, xphash, targetnode, thisdata)
parent_path = xphash[:parent]
parent_set = @doc.xpath("//#{parent_path}")
Expand All @@ -143,8 +173,16 @@ def map_subgroup(xpath, xphash, targetnode, thisdata)
groups[i] = { parent: p, data: {} }
end

add_uneven_subgroup_warning(parent_path: parent_path,
intervening_path: subgroup_path,
subgroup: subgroup) unless even_subgroup_field_values?(thisdata)
add_too_many_subgroups_warning(parent_path: parent_path,
intervening_path: subgroup_path,
subgroup: subgroup) unless group_accommodates_subgroup?(groups, thisdata)

thisdata.each do |f, v|
v.each_with_index do |val, i|
next if groups[i].nil?
groups[i][:data][f] = val
end
end
Expand Down
4 changes: 4 additions & 0 deletions lib/collectionspace/mapper/data_prepper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def transform_date_fields

def handle_term_fields
@xphash.each{ |xpath, hash| do_term_handling(xpath, hash) }
@response.warnings.flatten!
@response.errors.flatten!
@response.transformed_data
end

Expand Down Expand Up @@ -178,6 +180,8 @@ def do_term_handling(xpath, xphash)
config: @config)
@response.transformed_data[column] = th.result
@response.terms << th.terms
@response.warnings << th.warnings unless th.warnings.empty?
@response.errors << th.errors unless th.errors.empty?
end
end

Expand Down
54 changes: 41 additions & 13 deletions lib/collectionspace/mapper/term_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
module CollectionSpace
module Mapper
class TermHandler
attr_reader :result, :terms
attr_reader :result, :terms, :warnings, :errors,
:column, :source_type, :type, :subtype
attr_accessor :value
def initialize(mapping:, data:, client:, cache:, config:)
@mapping = mapping
@column = mapping[:datacolumn]
Expand All @@ -23,6 +25,8 @@ def initialize(mapping:, data:, client:, cache:, config:)
@type = 'vocabularies'
@subtype = @mapping[:transforms][:vocabulary]
end
@warnings = []
@errors = []
handle_terms
end

Expand All @@ -37,12 +41,14 @@ def handle_terms
end

def handle_term(val)
@value = val

return '' if val.blank?
added = false

term_report = {
category: @source_type,
field: @column
category: source_type,
field: column
}

if in_cache?(val)
Expand All @@ -63,44 +69,66 @@ def handle_term(val)

unless added
refname_obj = CollectionSpace::Mapper::Tools::RefName.new(
source_type: @source_type,
type: @type,
subtype: @subtype,
source_type: source_type,
type: type,
subtype: subtype,
term: val,
cache: @cache)
@terms << term_report.merge({ found: false, refname: refname_obj })
@cache.put(@type, @subtype, val, refname_obj.urn)
@cache.put(type, subtype, val, refname_obj.urn)
refname_urn = refname_obj.urn
end
refname_urn
end

def in_cache?(val)
@cache.exists?(@type, @subtype, val)
@cache.exists?(type, subtype, val)
end

def cached_term(val)
@cache.get(@type, @subtype, val, search: false)
@cache.get(type, subtype, val, search: false)
end

def searched_term(val)
begin
response = @client.find(
type: @type,
subtype: @subtype,
type: type,
subtype: subtype,
value: val,
field: search_field
)
rescue StandardError => e
puts e.message
else
response.parsed.dig('abstract_common_list', 'list_item', 'refName')
response_term_refname(response)
end
end

def response_term_refname(response)
term_ct = response.parsed.dig('abstract_common_list', 'totalItems')
return nil if term_ct.nil?

if term_ct.to_i == 1
refname = response.parsed.dig('abstract_common_list', 'list_item', 'refName')
elsif term_ct.to_i > 1
rec = response.parsed.dig('abstract_common_list', 'list_item')[0]
using_uri = "#{@client.config.base_uri}#{rec['uri']}"
refname = rec['refName']
warnings << {
category: :multiple_records_found_for_term,
field: column,
type: type,
subtype: subtype,
value: value,
message: "#{term_ct} records found. Using #{using_uri}"
}
end
refname
end

def search_field
begin
field = CollectionSpace::Service.get(type: @type)[:term]
field = CollectionSpace::Service.get(type: type)[:term]
rescue StandardError => e
puts e.message
else
Expand Down
2 changes: 1 addition & 1 deletion lib/collectionspace/mapper/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module CollectionSpace
module Mapper
VERSION = "2.1.4"
VERSION = "2.1.5"
end
end
49 changes: 48 additions & 1 deletion spec/collectionspace/mapper/data_mapper_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,52 @@
before(:all) do
@config = CollectionSpace::Mapper::DEFAULT_CONFIG
end

context 'core profile' do
before(:all) do
@client = core_client
@cache = core_cache
populate_core(@cache)
end
context 'collectionobject record' do
before(:all) do
@collectionobject_mapper = get_json_record_mapper(path: 'spec/fixtures/files/mappers/release_6_1/core/core_6_1_0-collectionobject.json')
@handler = CollectionSpace::Mapper::DataHandler.new(record_mapper: @collectionobject_mapper, client: @client, cache: @cache, config: @config)
end
context 'overflow subgroup record with uneven subgroup values' do
before(:all) do
@datahash = get_datahash(path: 'spec/fixtures/files/datahashes/core/collectionobject2.json')
@prepper = CollectionSpace::Mapper::DataPrepper.new(@datahash, @handler)
@mapper = CollectionSpace::Mapper::DataMapper.new(@prepper.prep, @handler, @prepper.xphash)
@mapped_doc = remove_namespaces(@mapper.response.doc)
@mapped_xpaths = list_xpaths(@mapped_doc)
@fixture_doc = get_xml_fixture('core/collectionobject2.xml')
@fixture_xpaths = test_xpaths(@fixture_doc, @handler.mapper[:mappings])
end
it 'mapper response includes overflow subgroup warning' do
w = @mapper.response.warnings.any?{ |w| w[:category] == :subgroup_contains_data_for_nonexistent_groups }
expect(w).to be true
end
it 'mapper response includes uneven subgroup values warning' do
w = @mapper.response.warnings.any?{ |w| w[:category] == :uneven_subgroup_field_values }
expect(w).to be true
end
it 'does not map unexpected fields' do
diff = @mapped_xpaths - @fixture_xpaths
expect(diff).to eq([])
end

it 'maps as expected' do
@fixture_xpaths.each do |xpath|
fixture_node = standardize_value(@fixture_doc.xpath(xpath).text)
mapped_node = standardize_value(@mapped_doc.xpath(xpath).text)
expect(mapped_node).to eq(fixture_node)
end
end
end
end
end

context 'lhmc profile' do
before(:all) do
@client = lhmc_client
Expand Down Expand Up @@ -45,7 +91,8 @@
end
end
end
end
end


context 'botgarden profile' do
before(:all) do
Expand Down
9 changes: 9 additions & 0 deletions spec/fixtures/files/datahashes/core/collectionobject2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"objectNumber": "21CS.001.0002",
"measuredPart": "unframed",
"dimensionSummary": "8.5 x 11",
"dimension": "height| width",
"measuredByOrganization": "Admirable Corporation",
"value": "8.5| 11",
"measurementUnit": "inches"
}
2 changes: 2 additions & 0 deletions spec/fixtures/files/xml/core/collectionobject2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<document name="collectionobjects"><ns2:collectionspace_core xmlns:ns2="http://collectionspace.org/collectionspace_core/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><createdAt>2021-02-12T00:31:59.813Z</createdAt><updatedBy>[email protected]</updatedBy><createdBy>[email protected]</createdBy><workflowState>project</workflowState><tenantId>1</tenantId><refName>urn:cspace:core.collectionspace.org:collectionobjects:id(98e3dcf4-2ae5-4c14-8e02)'21CS.001.0002'</refName><uri>/collectionobjects/98e3dcf4-2ae5-4c14-8e02</uri><updatedAt>2021-02-12T00:34:42.786Z</updatedAt></ns2:collectionspace_core><ns2:collectionobjects_common xmlns:ns2="http://collectionspace.org/services/collectionobject" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><fieldCollectionDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></fieldCollectionDateGroup><owners/><fieldCollectionMethods/><contentActivities/><objectProductionDateGroupList><objectProductionDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></objectProductionDateGroup></objectProductionDateGroupList><contentConcepts/><assocEventPlaces/><assocActivityGroupList/><objectProductionReasons/><assocPlaceGroupList/><objectNameList/><styles/><materialGroupList/><assocObjectGroupList/><fieldCollectors/><responsibleDepartments/><textualInscriptionGroupList><textualInscriptionGroup><inscriptionContentScript/><inscriptionContentInscriber/><inscriptionContentTransliteration/><inscriptionContentType/><inscriptionContentDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></inscriptionContentDateGroup><inscriptionContent/><inscriptionContentTranslation/><inscriptionContentInterpretation/><inscriptionContentPosition/><inscriptionContentLanguage/><inscriptionContentMethod/></textualInscriptionGroup></textualInscriptionGroupList><objectNumber>21CS.001.0002</objectNumber><fieldColEventNames/><colors/><contentScripts/><objectProductionPlaceGroupList/><referenceGroupList/><assocDateGroupList><assocDateGroup><assocDateNote/><assocStructuredDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></assocStructuredDateGroup><assocDateType/></assocDateGroup></assocDateGroupList><briefDescriptions/><contentLanguages/><assocPersonGroupList/><titleGroupList/><assocConceptGroupList/><usageGroupList/><assocCulturalContextGroupList/><otherNumberList/><objectProductionPeopleGroupList/><assocPeopleGroupList/><assocEventOrganizations/><contentPeoples/><namedCollections/><measuredPartGroupList><measuredPartGroup><dimensionSummary>8.5 x 11</dimensionSummary><measuredPart>unframed</measuredPart><dimensionSubGroupList><dimensionSubGroup><dimensionNote/><valueQualifier/><valueDate/><measuredBy>urn:cspace:core.collectionspace.org:orgauthorities:name(organization):item:name(AdmirableCorporation1613089843473)'Admirable Corporation'</measuredBy><dimension>height</dimension><value>8.5</value><measurementMethod/><measurementUnit>inches</measurementUnit></dimensionSubGroup></dimensionSubGroupList></measuredPartGroup></measuredPartGroupList><contentObjectGroupList/><ownershipDateGroupList><ownershipDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></ownershipDateGroup></ownershipDateGroupList><technicalAttributeGroupList/><nonTextualInscriptionGroupList><nonTextualInscriptionGroup><inscriptionDescriptionDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></inscriptionDescriptionDateGroup><inscriptionDescription/><inscriptionDescriptionInscriber/><inscriptionDescriptionInterpretation/><inscriptionDescriptionPosition/><inscriptionDescriptionMethod/><inscriptionDescriptionType/></nonTextualInscriptionGroup></nonTextualInscriptionGroupList><assocEventPersons/><objectProductionPersonGroupList/><contentPersons/><fieldCollectionSources/><contentEventNameGroupList/><ownersReferences/><viewersReferences/><objectProductionOrganizationGroupList/><computedCurrentLocation>urn:cspace:indeterminate:locationauthorities:name(indeterminate):item:name(indeterminate)'~Indeterminate Location~'</computedCurrentLocation><publishToList><publishTo/></publishToList><assocOrganizationGroupList/><contentOrganizations/><contentPlaces/><contentOtherGroupList/><contentDateGroup><scalarValuesComputed>false</scalarValuesComputed><dateEarliestSingleCertainty/><dateEarliestSingleQualifierUnit/><dateDisplayDate/><dateLatestScalarValue/><dateEarliestSingleQualifierValue/><datePeriod/><dateLatestEra/><dateEarliestSingleDay/><dateEarliestSingleQualifier/><dateEarliestSingleYear/><dateLatestCertainty/><dateAssociation/><dateLatestDay/><dateEarliestSingleMonth/><dateEarliestSingleEra/><dateLatestYear/><dateLatestQualifierUnit/><dateNote/><dateLatestQualifierValue/><dateLatestQualifier/><dateEarliestScalarValue/><dateLatestMonth/></contentDateGroup><objectStatusList/><objectComponentGroupList/><comments/><assocEventPeoples/><techniqueGroupList/><inventoryStatusList><inventoryStatus/></inventoryStatusList><contentPositions/><forms/></ns2:collectionobjects_common><ns2:account_permission xmlns:ns2="http://collectionspace.org/services/authorization"><account><accountId>d884e854-a10b-49c8-87e7-e7a900ffdc52</accountId><screenName>Administrator</screenName><userId>[email protected]</userId><tenantId>1</tenantId></account></ns2:account_permission></document>

0 comments on commit fb841a7

Please sign in to comment.