Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

oai: updated dcat and datacite serializers #1713

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion invenio_rdm_records/resources/serializers/datacite/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

"""DataCite based Schema for Invenio RDM Records."""

import json

from babel_edtf import parse_edtf
from edtf.parser.grammar import ParseException
from flask import current_app
Expand Down Expand Up @@ -73,18 +75,28 @@ def get_name_identifiers(self, obj):
"""Get name identifier list."""
serialized_identifiers = []
identifiers = obj["person_or_org"].get("identifiers", [])

for identifier in identifiers:
scheme = identifier["scheme"]
id_scheme = get_scheme_datacite(
scheme, "RDM_RECORDS_PERSONORG_SCHEMES", default=scheme
)

if id_scheme:

name_id = {
"nameIdentifier": identifier["identifier"],
"nameIdentifierScheme": id_scheme,
}

scheme_uri = ""
if scheme == "orcid":
scheme_uri = "http://orcid.org/"
elif scheme == "ror":
scheme_uri = "https://ror.org/"

if scheme_uri:
name_id["schemeUri"] = scheme_uri

serialized_identifiers.append(name_id)

return serialized_identifiers
Expand Down Expand Up @@ -247,6 +259,19 @@ def _merge_main_and_additional(self, obj, field, default_type=None):

result.append(item)

if field == "description":
# References
refs = obj["metadata"].get("references")
if refs:
result.append(
{
field: json.dumps(
{"references": [r["reference"] for r in refs]}
),
f"{field}Type": "Other",
}
)

return result or missing

def get_titles(self, obj):
Expand Down Expand Up @@ -563,6 +588,22 @@ def get_rights(self, obj):
entry["rightsUri"] = link
serialized_rights.append(entry)

# Adding access_status information

access_status = obj.get("access", {}).get("status", "")
if access_status == "metadata-only":
access_status = "closed"

access_right_formatted = access_status.capitalize() + " Access"
rights_uri = f"info:eu-repo/semantics/{access_status}Access"

access_right_serialized = {
"rights": access_right_formatted,
"rightsUri": rights_uri,
}

serialized_rights.append(access_right_serialized)

return serialized_rights if serialized_rights else missing

def get_funding(self, obj):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1655,7 +1655,10 @@
<xsl:otherwise>
<!--
<rdf:type rdf:resource="{$foaf}Agent"/>
-->
-->
<xsl:if test="$uri != ''">
<dct:identifier rdf:datatype="{$xsd}string"><xsl:value-of select="$nameIdentifier"/></dct:identifier>
Copy link
Member

@ptamarit ptamarit Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

major: So far this file has been fully synchronized with a repository which is not maintained by us:
https://github.com/ec-jrc/datacite-to-dcat-ap/blob/master/datacite-to-dcat-ap.xsl

We need to decide if we want to:

  • Diverge with the upstream version
  • Propose a contribution to the upstream version (we recently contributed a bug fix)
  • Find another way to achieve the expected result in InvenioRDM.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After some discussions, the decision is that we can:

  1. Merge this change
  2. Propose an PR with the same change upstream on https://github.com/ec-jrc/datacite-to-dcat-ap/

</xsl:if>
<xsl:if test="$agentName != ''">
<foaf:name><xsl:value-of select="$agentName"/></foaf:name>
</xsl:if>
Expand Down
16 changes: 14 additions & 2 deletions tests/resources/serializers/test_datacite_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_datacite43_serializer(running_app, full_record_to_dict):
{
"nameIdentifier": "0000-0001-8135-3489",
"nameIdentifierScheme": "ORCID",
"schemeUri": "http://orcid.org/",
}
],
"nameType": "Personal",
Expand All @@ -144,6 +145,7 @@ def test_datacite43_serializer(running_app, full_record_to_dict):
{
"nameIdentifier": "0000-0001-8135-3489",
"nameIdentifierScheme": "ORCID",
"schemeUri": "http://orcid.org/",
}
],
"nameType": "Personal",
Expand All @@ -167,6 +169,10 @@ def test_datacite43_serializer(running_app, full_record_to_dict):
"descriptionType": "Abstract",
},
{"description": "Bla bla bla", "descriptionType": "Methods", "lang": "eng"},
{
"description": '{"references": ["Nielsen et al,.."]}',
"descriptionType": "Other",
},
],
"formats": ["application/pdf"],
"fundingReferences": [
Expand Down Expand Up @@ -224,6 +230,10 @@ def test_datacite43_serializer(running_app, full_record_to_dict):
"rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
},
{"rights": "No rightsUri license"},
{
"rights": "Embargoed Access",
"rightsUri": "info:eu-repo/semantics/embargoedAccess",
},
],
"schemaVersion": "http://datacite.org/schema/kernel-4",
"sizes": ["11 pages"],
Expand Down Expand Up @@ -275,7 +285,7 @@ def test_datacite43_xml_serializer(running_app, full_record_to_dict):
" <givenName>Lars Holm</givenName>\n"
" <familyName>Nielsen</familyName>\n"
" <nameIdentifier "
'nameIdentifierScheme="ORCID">0000-0001-8135-3489</nameIdentifier>\n'
'nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0001-8135-3489</nameIdentifier>\n'
" <affiliation>CERN</affiliation>\n"
" <affiliation>free-text</affiliation>\n"
" </creator>\n"
Expand Down Expand Up @@ -303,7 +313,7 @@ def test_datacite43_xml_serializer(running_app, full_record_to_dict):
" <givenName>Lars Holm</givenName>\n"
" <familyName>Nielsen</familyName>\n"
" <nameIdentifier "
'nameIdentifierScheme="ORCID">0000-0001-8135-3489</nameIdentifier>\n'
'nameIdentifierScheme="ORCID" schemeURI="http://orcid.org/">0000-0001-8135-3489</nameIdentifier>\n'
" <affiliation>CERN</affiliation>\n"
" <affiliation>TU Wien</affiliation>\n"
" </contributor>\n"
Expand Down Expand Up @@ -340,12 +350,14 @@ def test_datacite43_xml_serializer(running_app, full_record_to_dict):
'rightsURI="https://creativecommons.org/licenses/by/4.0/legalcode" '
'rightsIdentifierScheme="spdx" rightsIdentifier="cc-by-4.0">Creative Commons '
"Attribution 4.0 International</rights>\n"
' <rights rightsURI="info:eu-repo/semantics/embargoedAccess">Embargoed Access</rights>\n'
" </rightsList>\n"
" <descriptions>\n"
' <description descriptionType="Abstract">A description \n'
"with HTML tags</description>\n"
' <description descriptionType="Methods" xml:lang="eng">Bla bla '
"bla</description>\n"
' <description descriptionType="Other">{"references": ["Nielsen et al,.."]}</description>\n'
" </descriptions>\n"
" <geoLocations>\n"
" <geoLocation>\n"
Expand Down
9 changes: 9 additions & 0 deletions tests/resources/serializers/test_dcat_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_dcat_serializer(running_app, full_record_to_dict):
" <dct:creator>\n"
' <rdf:Description rdf:about="https://orcid.org/0000-0001-8135-3489">\n'
' <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>\n'
' <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-8135-3489</dct:identifier>\n'
" <foaf:name>Nielsen, Lars Holm</foaf:name>\n"
" <foaf:givenName>Lars Holm</foaf:givenName>\n"
" <foaf:familyName>Nielsen</foaf:familyName>\n"
Expand Down Expand Up @@ -107,6 +108,7 @@ def test_dcat_serializer(running_app, full_record_to_dict):
" <dct:contributor>\n"
' <rdf:Description rdf:about="https://orcid.org/0000-0001-8135-3489">\n'
' <rdf:type rdf:resource="http://xmlns.com/foaf/0.1/Person"/>\n'
' <dct:identifier rdf:datatype="http://www.w3.org/2001/XMLSchema#string">0000-0001-8135-3489</dct:identifier>\n'
" <foaf:name>Nielsen, Lars Holm</foaf:name>\n"
" <foaf:givenName>Lars Holm</foaf:givenName>\n"
" <foaf:familyName>Nielsen</foaf:familyName>\n"
Expand Down Expand Up @@ -181,6 +183,7 @@ def test_dcat_serializer(running_app, full_record_to_dict):
" <rdfs:label>Bla bla bla</rdfs:label>\n"
" </dct:ProvenanceStatement>\n"
" </dct:provenance>\n"
' <dct:description>{"references": ["Nielsen et al,.."]}</dct:description>\n'
" <dct:spatial>\n"
" <dct:Location>\n"
" <rdf:type "
Expand All @@ -199,6 +202,12 @@ def test_dcat_serializer(running_app, full_record_to_dict):
'rdf:datatype="http://www.opengis.net/ont/geosparql#geoJSONLiteral"><![CDATA[{"type":"Point","coordinates":[-32.94682,-60.63932]}]]></dcat:centroid>\n'
" </dct:Location>\n"
" </dct:spatial>\n"
' <dct:accessRights rdf:resource="http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC"/>\n'
" <dct:accessRights>\n"
' <dct:RightsStatement rdf:about="info:eu-repo/semantics/embargoedAccess">\n'
" <rdfs:label>Embargoed Access</rdfs:label>\n"
" </dct:RightsStatement>\n"
" </dct:accessRights>\n"
" <dcat:distribution>\n"
" <dcat:Distribution>\n"
" <dct:extent>\n"
Expand Down
10 changes: 10 additions & 0 deletions tests/services/pids/test_pids_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ def test_full_record_register(
{
"nameIdentifier": "0000-0001-8135-3489",
"nameIdentifierScheme": "ORCID",
"schemeUri": "http://orcid.org/",
}
],
"nameType": "Personal",
Expand All @@ -622,6 +623,7 @@ def test_full_record_register(
{
"nameIdentifier": "0000-0001-8135-3489",
"nameIdentifierScheme": "ORCID",
"schemeUri": "http://orcid.org/",
}
],
"nameType": "Personal",
Expand All @@ -645,6 +647,10 @@ def test_full_record_register(
"descriptionType": "Methods",
"lang": "eng",
},
{
"description": '{"references": ["Nielsen et ' 'al,.."]}',
"descriptionType": "Other",
},
],
"formats": ["application/pdf"],
"fundingReferences": [
Expand Down Expand Up @@ -702,6 +708,10 @@ def test_full_record_register(
"rightsIdentifierScheme": "spdx",
"rightsUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
},
{
"rights": " Access",
"rightsUri": "info:eu-repo/semantics/Access",
},
],
"schemaVersion": "http://datacite.org/schema/kernel-4",
"sizes": ["11 pages"],
Expand Down
Loading