Skip to content

Commit

Permalink
spatial coverage metadata parsing for ISO19XXX, EML and DataCite see #…
Browse files Browse the repository at this point in the history
  • Loading branch information
huberrob committed Oct 24, 2024
1 parent 95da507 commit 4727968
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 20 deletions.
22 changes: 21 additions & 1 deletion fuji_server/helper/metadata_collector_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,6 @@ def get_mapped_xml_metadata(self, tree, mapping):
res = dict()
# make sure related_resources are not listed in the mapping dict instead related_resource_Reltype has to be used
res["related_resources"] = []

for prop in mapping:
res[prop] = []
if isinstance(mapping.get(prop).get("path"), list):
Expand Down Expand Up @@ -402,4 +401,25 @@ def get_mapped_xml_metadata(self, tree, mapping):
res.pop("object_content_identifier_size", None)
res.pop("object_content_identifier_url", None)
res.pop("object_content_identifier_service", None)
if res.get("coverage_spatial_coordinates") or res.get("coverage_spatial_names"):
res["coverage_spatial"] = []
if not isinstance(res["coverage_spatial_coordinates"], list):
res["coverage_spatial_coordinates"] = [res["coverage_spatial_coordinates"]]
ci = 0
for spatial_info in res["coverage_spatial_coordinates"] or res.get("coverage_spatial_names"):
spatial_coordinates = None
spatial_name = None
if res.get("coverage_spatial_coordinates"):
if ci < len(res["coverage_spatial_coordinates"]):
spatial_coordinates = res["coverage_spatial_coordinates"][ci]
if res.get("coverage_spatial_name"):
if ci < len(res["coverage_spatial_name"]):
spatial_name = res["coverage_spatial_name"][ci]
res["coverage_spatial"].append(
{"coordinates": str(spatial_coordinates).split(" "), "name": spatial_name}
)
ci += 1
res.pop("coverage_spatial_coordinates", None)
res.pop("coverage_spatial_name", None)

return res
55 changes: 36 additions & 19 deletions fuji_server/helper/metadata_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ def flip_dict(dict_to_flip):
"submitted_date: dates[?dateType == 'Submitted'].date,"
"object_content_identifier: {url: contentUrl} , "
"access_level: rightsList[*].rightsUri || rightsList[*].rights, "
"language: language }"
"language: language,"
"coverage_spatial: geoLocations[*].{coordinates: geoLocationBox.*[] || geoLocationPoint.*[] || geoLocationPolygons[*].polygonPoints[].*[],name: geoLocationPlace }}"
)
#'related_resources: relatedIdentifiers[*].[relatedIdentifier,relationType]}'

Expand Down Expand Up @@ -378,6 +379,20 @@ def flip_dict(dict_to_flip):
"license": {"path": ["./{*}rightsList/{*}rights", "./{*}rightsList/{*}rights@@rightsURI"]},
"access_level": {"path": ["./{*}rightsList/{*}rights", "./{*}rightsList/{*}rights@@rightsURI"]},
"language": {"path": "./{*}language"},
"coverage_spatial_coordinates": {
"path": [
"./{*}geoLocations/{*}geoLocation/{*}geoLocationPoint",
"./{*}geoLocations/{*}geoLocation/{*}geoLocationBox",
"./{*}geoLocations/{*}geoLocation/{*}geoLocationPolygon",
]
},
"coverage_spatial_name": {
"path": [
"./{*}geoLocations/{*}geoLocationPlace",
"./{*}geoLocations/{*}geoLocationPlace",
"./{*}geoLocations/{*}geoLocationPlace",
]
},
}

XML_MAPPING_METS = {
Expand Down Expand Up @@ -442,6 +457,10 @@ def flip_dict(dict_to_flip):
"path": "./{*}dataset/{*}dataTable/{*}physical/{*}distribution/{*}online/{*}size"
},
"language": {"path": "./{*}dataset/{*}language"},
"coverage_spatial_coordinates": {
"path": "./{*}dataset/{*}coverage/{*}geographicCoverage/{*}boundingCoordinates"
},
"coverage_spatial_name": {"path": "./{*}dataset/{*}coverage/{*}geographicCoverage/{*}geographicDescription"},
}
# CLARIN CMDI
XML_MAPPING_CMD = {
Expand Down Expand Up @@ -562,6 +581,9 @@ def flip_dict(dict_to_flip):
"object_content_identifier_type": {"path": ".//{*}fileDscr/{*}fileTxt/{*}fileType"},
"measured_variable": {"path": "./{*}dataDscr/{*}var@@name"},
"language": {"path": ["./{*}codeBook@@lang", "./{*}stdyDscr/{*}citation/{*}titlStmt/{*}titl@@xml:lang"]},
# https://ddialliance.org/Specification/DDI-Codebook/2.1/DTD/Documentation/version2-1-all.html#2.0
# spatial_coverage_name: geogCover
# spatial_coverage_coordinates:geoBndBox
}
XML_MAPPING_DIF = {
"object_identifier": {"path": "./{*}Dataset_Citation/{*}Persistent_Identifier"},
Expand Down Expand Up @@ -666,24 +688,6 @@ def flip_dict(dict_to_flip):
},
],
},
"""
"object_content_identifier_url": {
"path": [
"./{*}distributionInfo/{*}MD_Distribution//{*}CI_OnlineResource/{*}linkage/{*}URL",
#"./{*}distributionInfo/{*}MD_Distribution//{*}CI_OnlineResource[{*}protocol]/{*}linkage/{*}URL",
"./{*}distributionInfo/{*}MD_Distribution/{*}transferOptions/{*}MD_DigitalTransferOptions/{*}onLine/{*}CI_OnlineResource/{*}linkage/{*}URL"
]
},
"object_content_identifier_type": {
"path": [
"./{*}distributionInfo/{*}MD_Distribution//{*}CI_OnlineResource/{*}applicationProfile/{*}Anchor",
"./{*}distributionInfo/{*}MD_Distribution/{*}transferOptions/{*}MD_DigitalTransferOptions/{*}onLine/{*}CI_OnlineResource/{*}applicationProfile/{*}Anchor"
]
},
"object_content_identifier_service": {
"path": "./{*}distributionInfo/{*}MD_Distribution//{*}CI_OnlineResource/{*}protocol/{*}Anchor@@xlink:href"
},
"""
"measured_variable": {
"path": [
"./{*}contentInfo/{*}MD_CoverageDescription/{*}attributeDescription/{*}RecordType",
Expand Down Expand Up @@ -720,4 +724,17 @@ def flip_dict(dict_to_flip):
]
},
"language": {"path": "./{*}language/{*}LanguageCode@@codeListValue"},
"coverage_spatial_coordinates": {
"path": [
"./{*}identificationInfo//{*}geographicElement/{*}EX_GeographicBoundingBox",
"./{*}identificationInfo//{*}geographicElement/{*}gmd:EX_BoundingPolygon",
]
},
"coverage_spatial_name": {
"path": [
"./{*}identificationInfo//{*}geographicElement/{*}geographicIdentifier/{*}MD_Identifier/{*}code",
"./{*}identificationInfo//{*}geographicElement/{*}geographicIdentifier/{*}MD_Identifier/{*}code",
]
},
# "./{*}identificationInfo//{*}geographicElement//{*}posList"]
}

0 comments on commit 4727968

Please sign in to comment.