From f4e5df0d7f042437cf34552b1c8badbf8855b394 Mon Sep 17 00:00:00 2001 From: huberrob Date: Wed, 25 Sep 2024 13:47:45 +0200 Subject: [PATCH] updated +- to pr480 --- fuji_server/data/repodois.yaml | 21 +- fuji_server/helper/fuji_vocab_builder.py | 415 ++++++++++++++ fuji_server/helper/fuji_vocab_helper.py | 9 + fuji_server/yaml/metrics_v0.4.yaml | 2 +- fuji_server/yaml/metrics_v0.6.yaml | 674 +++++++++++++++++++++++ 5 files changed, 1117 insertions(+), 4 deletions(-) create mode 100644 fuji_server/helper/fuji_vocab_builder.py create mode 100644 fuji_server/helper/fuji_vocab_helper.py create mode 100644 fuji_server/yaml/metrics_v0.6.yaml diff --git a/fuji_server/data/repodois.yaml b/fuji_server/data/repodois.yaml index db9707e2..35eb04f9 100644 --- a/fuji_server/data/repodois.yaml +++ b/fuji_server/data/repodois.yaml @@ -4,6 +4,7 @@ ardcx.curtin: https://doi.org/10.17616/R3WM02 ardcx.griffith: https://doi.org/10.17616/R3FG92 ardcx.usq: https://doi.org/10.17616/R36Q18 arfm.ufzdrp: https://doi.org/10.17616/R31NJN1M +awsod.roda: https://doi.org/10.17616/R3M91V awzy.fednnv: https://doi.org/10.17616/R3PM1K bamf.bamf: https://doi.org/10.17616/R31NJMZ5 bf.discover: https://doi.org/10.17616/R31NJMN0 @@ -48,6 +49,7 @@ bl.ukda: https://doi.org/10.17616/R3088K bmbf.hirzqt: https://doi.org/10.17616/R3CK9G brown.bdr: https://doi.org/10.17616/R3193B brvz.rdr: https://doi.org/10.17616/R31NJN5S +bvbw.dhbqgy: https://doi.org/10.17616/R3H35C caltech.data: https://doi.org/10.17616/R3SW99 carl.frdr: https://doi.org/10.17616/R3X50W cbg.datasets: https://doi.org/10.17616/R3PP7J @@ -90,6 +92,7 @@ dkrz.wdcc: https://doi.org/10.17616/R3989R dryad.dryad: https://doi.org/10.17616/R34S33 dzhw.fdz-dzhw: https://doi.org/10.17616/R3BR0G elsevier.md: https://doi.org/10.17616/R3DD11 +epua.rdcrki: https://doi.org/10.17616/R3K31G esdis.ornl: https://doi.org/10.17616/R3SG61 estdoi.keel: https://doi.org/10.17616/R3P636 estdoi.qdb: https://doi.org/10.17616/R31NJMVF @@ -131,8 +134,8 @@ gesis.dezim: https://doi.org/10.17616/R31NJMNK gesis.gesis: https://doi.org/10.17616/R3BB70 gesis.icpsr: https://doi.org/10.17616/R3BC8Q gesis.indepth: https://doi.org/10.17616/R3432T +gesis.iqb: https://doi.org/10.17616/R3M30D gesis.neps: https://doi.org/10.17616/R35P7W -gesis.rki: https://doi.org/10.17616/R3K31G gesis.share: https://doi.org/10.17616/R3BK5N gesis.sodanet: https://doi.org/10.17616/R38G7R gesis.srda: https://doi.org/10.17616/R31NJMLA @@ -141,6 +144,7 @@ gesis.zpid: https://doi.org/10.17616/R3FS48 hain.njltcl: https://doi.org/10.17616/R31NJNAM heliophy.spdf: https://doi.org/10.17616/R3P301 hlqc.znxeli: https://doi.org/10.17616/R31NJMJP +hyfx.gqlzxo: https://doi.org/10.17616/R31NJNDH ieee.dataport: https://doi.org/10.17616/R3H77H iiasa.dare: https://doi.org/10.17616/R31NJMH0 illinois.databank: https://doi.org/10.17616/R3C93F @@ -150,7 +154,7 @@ inist.humanum: https://doi.org/10.17616/R3JM1B inist.ifremer: https://doi.org/10.17616/R31NJMHX inist.ifsttar: https://doi.org/10.17616/R31NJMKM inist.ill: https://doi.org/10.17616/R33H18 -inist.inra: https://doi.org/10.17616/R3DJ4J +inist.osuna: https://doi.org/10.17616/R31NJNKA inist.otelo: https://doi.org/10.17616/R3F19K inist.resif: https://doi.org/10.17616/R37Q06 iris.iris: https://doi.org/10.17616/R3X607 @@ -161,6 +165,7 @@ jbru.bbees: https://doi.org/10.17616/R31NJNEN jcvi.eivbwb: https://doi.org/10.17616/R30P93 jcvi.gxpwaq: https://doi.org/10.17616/R3S634 kaggle.kaggle: https://doi.org/10.17616/R32N5V +kim.colabfit: https://doi.org/10.17616/R31NJNJL kim.openkim: https://doi.org/10.17616/R3SK8T kmot.rdr: https://doi.org/10.17616/R31NJMN6 ktsw.aezvvv: https://doi.org/10.17616/R31NJMVX @@ -173,6 +178,7 @@ mdw.repository: https://doi.org/10.17616/R30M00 mit.physio: https://doi.org/10.17616/R3D06S mlbs.skuxgs: https://doi.org/10.17616/R3ZP8D nasapds.nasapds: https://doi.org/10.17616/R37593 +nbqj.daks: https://doi.org/10.17616/R31NJMZS nkn.nkn: https://doi.org/10.17616/R3JK91 nlqq.xoncsf: https://doi.org/10.17616/R31NJMYB noaa.ncei: https://doi.org/10.17616/R3362J @@ -192,8 +198,15 @@ psu.datacom: https://doi.org/10.17616/R31NJMGV psu.scholar: https://doi.org/10.17616/R34W4H pu.dataspace: https://doi.org/10.17616/R33B6F purdue.purduelib: https://doi.org/10.17616/R3V90N +rdg.prod: https://doi.org/10.17616/R31NJN8R +repod.dbuw: https://doi.org/10.17616/R31NJNHR +repod.mxrdr: https://doi.org/10.17616/R31NJMYU +repod.rds: https://doi.org/10.17616/R31NJMYP rg.rg: https://doi.org/10.17616/R36H16 rice.kinder: https://doi.org/10.17616/R3GB90 +rpak.jpbpto: https://doi.org/10.17616/R31NJNLU +rpak.tjjamp: https://doi.org/10.17616/R31NJNLW +rpak.zvrzin: https://doi.org/10.17616/R31NJNLV sagebio.synapse: https://doi.org/10.17616/R3B934 si.cda: https://doi.org/10.17616/R3201S si.si: https://doi.org/10.17616/R3W49N @@ -224,9 +237,11 @@ tib.kit4radar: https://doi.org/10.17616/R31NJMVU tib.ldeo: https://doi.org/10.17616/R3PV10 tib.luis: https://doi.org/10.17616/R3C519 tib.radar: https://doi.org/10.17616/R3ZX96 +tib.repod: https://doi.org/10.17616/R3JS93 tib.tuhh: https://doi.org/10.17616/R31NJML0 tib.ub-hro: https://doi.org/10.17616/R31NJMQH tib.ubbs: https://doi.org/10.17616/R31NJMTU +tib.ubmr: https://doi.org/10.17616/R31NJMTD tib.ukon: https://doi.org/10.17616/R3M024 tib.zih: https://doi.org/10.17616/R31NJMHY tuw.tethys: https://doi.org/10.17616/R31NJMU0 @@ -237,7 +252,7 @@ umn.drum: https://doi.org/10.17616/R30D19 umn.ipums: https://doi.org/10.17616/R3ZS59 undr.undr: https://doi.org/10.17616/R31NJN9K unimelb.repo1: https://doi.org/10.17616/R3HH10 -unm.dataone: https://doi.org/10.17616/R3101G +unisalzb.anc: https://doi.org/10.17616/R31NJNKJ usda.usda: https://doi.org/10.17616/R3G051 usgs.prod: https://doi.org/10.17616/R33S3S uva.libra: https://doi.org/10.17616/R3TS86 diff --git a/fuji_server/helper/fuji_vocab_builder.py b/fuji_server/helper/fuji_vocab_builder.py new file mode 100644 index 00000000..b4b689aa --- /dev/null +++ b/fuji_server/helper/fuji_vocab_builder.py @@ -0,0 +1,415 @@ +# SPDX-FileCopyrightText: 2020 PANGAEA (https://www.pangaea.de/) +# +# SPDX-License-Identifier: MIT + +import json +import os + +from fuji_server.helper.identifier_helper import IdentifierHelper +from fuji_server.helper.linked_vocab_helper import LinkedVocabHelper +from fuji_server.helper.metadata_collector import ( + MetadataFormats, + MetadataOfferingMethods, + metadata_mapper, +) + + +class fuji_knowledge_base: + def __init__(self, root=""): + self.namespace = "https://f-uji.net/vocab" + self.vocabdict = { + self.namespace + "/metadata": { + "uri": self.namespace + "/metadata", + "label": "Metadata", + "description": "Everything about metadata", + "source": "f-uji.net", + }, + self.namespace + "/data": { + "uri": self.namespace + "/data", + "label": "Data", + "description": "Everything about data, usually the item(s) which is/are described by metadata", + "source": "f-uji.net", + }, + self.namespace + "/identifier": { + "uri": self.namespace + "/identifier", + "label": "Digital Identifier", + "description": "A name or identifier which identifies a digital object", + "source": "f-uji.net", + }, + self.namespace + "/access_condition": { + "uri": self.namespace + "/access_condition", + "label": "Access Conditions", + "description": "Information concerning the accessibility of resources, especially about existing restrictions.", + "source": "f-uji.net", + }, + } + self.root = root + base_path = os.path.abspath(os.path.dirname(__file__)) + self.fuji_data_path = os.path.join(base_path, "..", "data") + + def get_vocab_dict(self): + self.add_transport_protocol() + self.add_identifiers() + self.add_metadata_properties() + self.add_metadata_methods() + self.add_metadata_formats() + self.add_metadata_exchange_method() + self.add_metadata_standards() + self.add_semantic_resources() + self.add_licenses() + self.add_file_types() + self.add_data_properties() + self.add_access_rights() + return self.vocabdict + # self.add_metadata_methods('metadata/method') + + def add_transport_protocol(self): + parentkey = "transport_protocol" + prdict = {} + with open(os.path.join(self.fuji_data_path, "standard_uri_protocols.json"), encoding="utf-8") as acf: + protocol_dict = json.load(acf) + for protocolid, protocol in protocol_dict.items(): + prdict[self.namespace + "/" + parentkey + "/" + protocolid] = { + "label": protocol.get("name"), + "uri": self.namespace + "/" + parentkey + "/" + protocolid, + "broader": self.namespace + "/" + parentkey, + } + + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/transport_protocol", + "label": "Transport Protocol", + "description": "Transport protocols used to handle data or metadata ideally via the internet.", + "source": "f-uji.net", + } + self.vocabdict.update(prdict) + return prdict + + def add_access_rights(self): + parentkey = "access_condition" + ac_dict = {} + with open(os.path.join(self.fuji_data_path, "access_rights.json"), encoding="utf-8") as acf: + access_rights_dict = json.load(acf) + for access_source in access_rights_dict.values(): + ac_dict[self.namespace + "/" + parentkey + "/" + access_source.get("id")] = { + "label": access_source.get("label"), + "uri": self.namespace + "/" + parentkey + "/" + access_source.get("id"), + "identifier": access_source.get("identifier"), + "broader": self.namespace + "/" + parentkey, + "source": access_source.get("source"), + } + for access_info in access_source.get("members"): + ac_dict[ + self.namespace + "/" + parentkey + "/" + access_source.get("id") + "/" + access_info.get("id") + ] = { + "label": access_info.get("label"), + "uri": self.namespace + + "/" + + parentkey + + "/" + + access_source.get("id") + + "/" + + access_info.get("id"), + "identifier": {"value": access_info.get("uri"), "type": "homepage"}, + "broader": self.namespace + "/" + parentkey + "/" + access_source.get("id"), + "source": access_info.get("source"), + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Access Conditions", + "description": "Information concerning the accessibility of resources, especially about existing restrictions.", + "source": "f-uji.net", + } + self.vocabdict.update(ac_dict) + return ac_dict + + # serialisation: html, json, json-ld, rdf (ttl, n3), xml + def add_identifiers(self): + parentkey = "identifier/persistent" + id_dict = {} + identifiers = IdentifierHelper.VALID_PIDS + for idk, idv in identifiers.items(): + id_dict[self.namespace + "/" + parentkey + "/" + idk] = { + "label": idv.get("label"), + "uri": self.namespace + "/" + parentkey + "/" + idk, + "broader": self.namespace + "/" + parentkey, + "source": idv.get("source"), + } + + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Persistent Identifier", + "broader": self.namespace + "/identifier", + "source": "f-uji.net", + } + self.vocabdict.update(id_dict) + + def add_data_properties(self): + parentkey = "data/property/" + self.vocabdict[self.namespace + "/" + parentkey + "/size"] = { + "uri": self.namespace + "/" + parentkey, + "label": "Data (File) Size", + "broader": self.namespace + "/" + parentkey, + "source": "f-uji.net", + } + self.vocabdict[self.namespace + "/" + parentkey + "/url"] = { + "uri": self.namespace + "/" + parentkey, + "label": "Data (File) Identifier (URI)", + "broader": self.namespace + "/" + parentkey, + "source": "f-uji.net", + } + self.vocabdict[self.namespace + "/" + parentkey + "/type"] = { + "uri": self.namespace + "/" + "/data", + "alias": "url", + "label": "Data Type (Mime Type)", + "broader": self.namespace + "/data", + "narrower": self.namespace + "/data/format", + "source": "f-uji.net", + } + + def add_metadata_properties(self): + parentkey = "metadata/property" + properties_dict = {} + properties = metadata_mapper.Mapper.REFERENCE_METADATA_LIST.value + if properties: + for propkey, propvalue in properties.items(): + propvalue["source"] = "f-uji.net" + propvalue["uri"] = str(self.namespace) + "/" + parentkey + "/" + str(propkey) + propvalue["broader"] = self.namespace + "/" + parentkey + properties_dict[self.namespace + "/" + parentkey + "/" + propkey] = propvalue + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Metadata Property", + "description": "", + "broader": self.namespace + "/metadata", + "source": "f-uji.net", + } + self.vocabdict.update(properties_dict) + + def add_relation_types(self): + reltype_dict = {} + parentkey = "relation_type" + for rel in metadata_mapper.Mapper.DC_MAPPING.value.get("related_resources"): + reltype_dict[self.namespace + "/" + parentkey + "/" + rel] = { + "label": rel, + "broader": self.namespace + "/" + parentkey, + "uri": self.namespace + "/" + parentkey + "/" + rel, + "sameAs": "http://purl.org/dc/terms/" + rel, + "source": "dublincore.org", + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Relation Types", + "description": "Terms describing the type of relation between digital entities. This list is based on but not restricted to those defined by dublin core. Internally, relation types other than DC terms are mappred to dc terms", + "broader": self.namespace, + "source": "f-uji.net", + } + self.vocabdict.update(reltype_dict) + + def add_metadata_formats(self): + parentkey = "metadata/format" + metamethod_dict = {} + for method in MetadataFormats: + metamethod_dict[self.namespace + "/" + parentkey + "/" + method.acronym()] = { + "label": method.value.get("label"), + "broader": self.namespace + "/" + parentkey, + "uri": self.namespace + "/" + parentkey + "/" + method.acronym(), + "source": "f-uji.net", + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Metadata Format", + "description": "Formats in which metadata van be serialised such as XML, RDF etc.", + "broader": self.namespace + "/metadata", + "source": "f-uji.net", + } + self.vocabdict.update(metamethod_dict) + + def add_metadata_methods(self): + parentkey = "metadata/offering_method" + metamethod_dict = {} + for method in MetadataOfferingMethods: + metamethod_dict[self.namespace + "/" + parentkey + "/" + method.acronym()] = { + "label": method.value.get("label"), + "broader": self.namespace + "/" + parentkey, + "uri": self.namespace + "/" + parentkey + "/" + method.acronym(), + "source": "f-uji.net", + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Metadata Offering Method", + "description": "", + "broader": self.namespace + "/metadata", + "source": "f-uji.net", + } + self.vocabdict.update(metamethod_dict) + + def add_metadata_standards(self): + parentkey = "metadata/standard" + metastandards_dict = {} + with open(os.path.join(self.fuji_data_path, "metadata_standards.json")) as mdf: + mddict = json.load(mdf) + for mk, mv in mddict.items(): + mk = mk.replace(".yml", "") + sources = [] + identifiers = [] + for ident in mv.get("identifier"): + if ident.get("type") in ["namespace", "schema", "homepage"]: + identifiers.append(ident) + if ident.get("value"): + if "msc:" in ident.get("value"): + sources.append("rd-alliance.org") + elif "fairsharing.org" in ident.get("value"): + sources.append("fairsharing.org") + elif "www.dcc.ac" in ident.get("value"): + sources.append("dcc.ac") + metastandards_dict[self.namespace + "/" + parentkey + "/" + mk] = { + "label": mv.get("title"), + "uri": self.namespace + "/" + parentkey + "/" + mk, + "broader": self.namespace + "/" + parentkey, + "identifier": identifiers, + "field_of_science": mv.get("field_of_science"), + "source": sources, + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Metadata Standard", + "description": "", + "broader": self.namespace + "/metadata", + "source": "f-uji.net", + } + self.vocabdict.update(metastandards_dict) + + def add_semantic_resources(self): + parentkey = "semantic_resource" + semanticdict = {} + lov_helper = LinkedVocabHelper() + lov_helper.set_linked_vocab_dict() + linked_vocab_index = lov_helper.linked_vocab_dict + for lovk, lovv in linked_vocab_index.items(): + identifiers = [] + if lovv.get("uri_format"): + namespace = str(lovv.get("uri_format")).replace("$1", "") + identifiers.append({"type": "namespace", "value": namespace}) + if lovv.get("homepage"): + homepage = str(lovv.get("homepage")) + identifiers.append({"type": "homepage", "value": homepage}) + semanticdict[self.namespace + "/" + parentkey + "/" + lovk] = { + "uri": self.namespace + "/" + parentkey + "/" + lovk, + "broader": self.namespace + "/" + parentkey, + "label": lovv.get("name"), + "identifier": identifiers, + "source": lovv.get("source"), + "field_of_science": lovv.get("subjects"), + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Semantic Resource", + "description": "", + "source": "f-uji.net", + } + self.vocabdict.update(semanticdict) + + def add_licenses(self): + parentkey = "license" + license_dict = {} + with open(os.path.join(self.fuji_data_path, "licenses.json")) as lcd: + license_list = json.load(lcd) + for lic in license_list: + license_dict[self.namespace + "/" + parentkey + "/" + lic.get("licenseId")] = { + "uri": self.namespace + "/" + parentkey + "/" + lic.get("licenseId"), + "broader": self.namespace + "/" + parentkey, + "label": lic.get("name"), + "identifier": [ + { + "type": "homepage", + "value": str(lic.get("reference")).replace("./", "https://spdx.org/licenses/"), + }, + {"type": "schema", "value": lic.get("detailsUrl")}, + ], + "source": "spdx.org", + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "label": "Licenses", + "description": "", + "source": "f-uji.net", + } + self.vocabdict.update(license_dict) + + def add_metadata_exchange_method(self): + service_dict = { + self.namespace + "/metadata/exchange_service": { + "uri": self.namespace + "/metadata/exchange_service", + "label": "Metadata Exchange Service", + "description": "Standardised services which are used to exchange metadata between machines", + "source": "f-uji.net", + }, + self.namespace + "/metadata/exchange_service/oai_pmh": { + "uri": self.namespace + "/metadata/exchange_service/oai_pmh", + "label": "OAI-PMH", + "broader": self.namespace + "/metadata/exchange_service", + "description": "Open Archives Initiative Protocol for Metadata Harvesting", + "source": "f-uji.net", + }, + self.namespace + "/metadata/exchange_service/ogc_csw": { + "uri": self.namespace + "/metadata/exchange_service/ogc_csw", + "label": "OGC CSW", + "broader": self.namespace + "/metadata/exchange_service", + "description": "Catalogue Service for the Web", + "source": "f-uji.net", + }, + self.namespace + "/metadata/exchange_service/sparql": { + "uri": self.namespace + "/metadata/exchange_service/sparql", + "label": "SPARQL", + "broader": self.namespace + "/metadata/exchange_service", + "description": "SPARQL Protocol and RDF Query Language", + "source": "f-uji.net", + }, + } + self.vocabdict.update(service_dict) + + def add_file_types(self): + parentkey = "data/format" + filedict = {} + with open(os.path.join(self.fuji_data_path, "file_formats.json")) as lcd: + file_format_list = json.load(lcd) + for ffk, ffv in file_format_list.items(): + identifiers = [] + for mime in ffv.get("mime"): + identifiers.append({"type": "mime", "value": mime}) + filedict[self.namespace + "/" + parentkey + "/" + ffk] = { + "uri": self.namespace + "/" + parentkey + "/" + ffk, + "label": ffv.get("name"), + "broader": self.namespace + "/" + parentkey, + "identifier": identifiers, + "source": ffv.get("source"), + "type": ffv.get("reason"), + } + self.vocabdict[self.namespace + "/" + parentkey] = { + "uri": self.namespace + "/" + parentkey, + "broader": self.namespace + "/data", + "label": "Data Format", + "description": "", + "source": "f-uji.net", + } + self.vocabdict.update(filedict) + + +fb = fuji_knowledge_base() +vocabdir = "C:\\xampp\\htdocs\\fuji\\.vocab" +for tk, tv in fb.get_vocab_dict().items(): + print(tk.replace(fb.namespace, "")) + # if tv.get('broader'): + termid = tk.split("/")[-1] + termdir = vocabdir + "/".join(tk.split("/")[:-1]).replace(fb.namespace, "").replace("/", "\\") + print(termdir) + # termdir = vocabdir+tv.get('broader').replace(fb.namespace,'').replace('/','\\') + os.makedirs(termdir, exist_ok=True) + with open(termdir + "\\" + termid + ".json", "w") as termfile: + json.dump(tv, termfile) +# add_metadata_properties('metadata/property') +# fb.add_relation_types() +# print(fuji_vocab_dict) + +# print(add_licenses('metadata/licenses')) +# print(add_semantic_resources('metadata/licenses')) diff --git a/fuji_server/helper/fuji_vocab_helper.py b/fuji_server/helper/fuji_vocab_helper.py new file mode 100644 index 00000000..27e0d125 --- /dev/null +++ b/fuji_server/helper/fuji_vocab_helper.py @@ -0,0 +1,9 @@ +# SPDX-FileCopyrightText: 2020 PANGAEA (https://www.pangaea.de/) +# +# SPDX-License-Identifier: MIT + + +class fuji_term: + def __init__(self, term=None): + if term: + self.term = term diff --git a/fuji_server/yaml/metrics_v0.4.yaml b/fuji_server/yaml/metrics_v0.4.yaml index 08693071..98c2c1ca 100644 --- a/fuji_server/yaml/metrics_v0.4.yaml +++ b/fuji_server/yaml/metrics_v0.4.yaml @@ -125,7 +125,7 @@ metrics: evaluation_mechanism: The metric is evaluated using the given metadata standards known to support major search engines such as JSON-LD and Dublin Core. Presence of metadata in research data registries is further evaluated. metric_tests: - metric_test_identifier: FsF-F4-01M-1 - metric_test_name: Metadata is given in a way major search engines can ingest it for their catalogues (JSON-LD, Dublin Core, RDFa) + metric_test_name: Metadata is given in a way major search engines can ingest it for their catalogues (embedded JSON-LD, Dublin Core or RDFa) metric_test_score: 1 metric_test_maturity: 3 - metric_test_identifier: FsF-F4-01M-2 diff --git a/fuji_server/yaml/metrics_v0.6.yaml b/fuji_server/yaml/metrics_v0.6.yaml new file mode 100644 index 00000000..ed4eea80 --- /dev/null +++ b/fuji_server/yaml/metrics_v0.6.yaml @@ -0,0 +1,674 @@ +# LIST OF FAIRSFAIR METRICS AND THEIR RESPONSE OUTPUT FORMATS +config: + metric_specification: https://doi.org/10.5281/zenodo.4081213 + metric_status: draft +metrics: + ## ---------------- FINDABILITY ---------------- ## +- metric_identifier: FsF-F1-01M + metric_number: 1 + metric_short_name: Unique Identifier Metadata + metric_name: Metadata is assigned a globally unique identifier. + description: A globally unique identifier may be assigned to a landing page containing metadata or a metadata file such that it can be referenced unambiguously by humans or machines. Globally unique means an identifier should be associated with only one resource at any time. Examples of unique identifiers are Internationalized Resource Identifier (IRI), Uniform Resource Identifier (URI) such as URL and URN, Digital Object Identifier (DOI), the Handle System, identifiers.org, w3id.org and Archival Resource Key (ARK). A data repository may assign a globally unique identifier to your metadata when you publish and make it available through their services. + fair_principle: F1 + target: Metadata or Landingpage + evaluation_mechanism: Identifier is considered unique if it is successfully validated through https://pythonhosted.org/IDUtils/. Supported schemes are ISBN10, ISBN13, ISSN, ISTC, DOI, Handle, EAN8, EAN13, ISNI ORCID, ARK, PURL, LSID, URN, Bibcode, arXiv, PubMed ID, PubMed Central ID, GND. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-F1-01M-1 + metric_test_name: Identifier is resolvable and follows a defined unique identifier syntax (IRI, URL) + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + modality: any + comment: identifier can be given as user input + - metric_test_identifier: FsF-F1-01M-2 + metric_test_name: Identifier is not resolvable but follows an UUID or HASH type syntax + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier/unique + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + modality: any + required: + name: + - uuid + - hash + comment: identifier can be given as user input + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-11-25 + version: 0.6 + total_score: 1 + +- metric_identifier: FsF-F1-02M + metric_number: 2 + metric_short_name: Persistent Identifier Metadata + metric_name: Data is assigned a persistent identifier. + description: We make a distinction between the uniqueness and persistence of an identifier. An HTTP URL (the address of a given unique resource on the web) is globally unique, but may not be persistent as the URL of data may be not accessible (link rot problem) or the data available under the original URL may be changed (content drift problem). Identifiers based on the Handle System, DOI, ARK are both globally unique and persistent. They are maintained and governed such that they remain stable and resolvable for the long term. The persistent identifier (PID) of a data object may be resolved (point) to a landing page with metadata containing further information on how to access the data content, in some cases a downloadable artefact, or none if the data or repository is no longer maintained. Therefore, ensuring persistence is a shared responsibility between a PID service provider (e.g., datacite) and its clients (e.g., data repositories). For example, the DOI system guarantees the persistence of its identifiers through its social (e.g., policy) and technical infrastructures, whereas a data provider ensures the availability of the resource (e.g., landing page, downloadable artefact) associated with the identifier. + fair_principle: F1 + target: Data + evaluation_mechanism: A persistent identifier is considered to be valid if the given identifier complies with a valid PID synthax. To be valid, the PID further has to be resolvable. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-F1-02M-1 + metric_test_name: Identifier follows a defined persistent identifier syntax + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier/persistent + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + modality: any + comment: identifier can be given as user input + - metric_test_identifier: FsF-F1-02M-2 + metric_test_name: Persistent identifier is resolvable + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier/persistent + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + comment: identifier has to resolve to a valid URI + metric_test_score: 0.25 + metric_test_maturity: 2 + - metric_test_identifier: FsF-F1-02M-3 + metric_test_name: Persistent identifier resolves to a page which belongs to the issuer's domain + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier/persistent + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + comment: identifier has to resolve to a domain owned by the issuer, e.g. PIDs listed in a landing page metadata record should point back to that landing page + metric_test_score: 0.25 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-11-25 + version: 0.6 + total_score: 1 + +- metric_identifier: FsF-F1-01DD + metric_number: 1 + metric_short_name: Unique Identifier Data + metric_name: Data is assigned a globally unique identifier. + description: A globally unique identifier may be assigned to data such as a data file such that it can be referenced unambiguously by humans or machines. Globally unique means an identifier should be associated with only one resource at any time. Examples of unique identifiers are Internationalized Resource Identifier (IRI), Uniform Resource Identifier (URI) such as URL and URN, Digital Object Identifier (DOI), the Handle System, identifiers.org, w3id.org and Archival Resource Key (ARK). A data repository may assign a globally unique identifier to your data files when you publish and make it available through their services. + fair_principle: F1 + target: Data + evaluation_mechanism: Data identifier is considered unique if it is successfully validated through https://pythonhosted.org/IDUtils/. Supported schemes are ISBN10, ISBN13, ISSN, ISTC, DOI, Handle, EAN8, EAN13, ISNI ORCID, ARK, PURL, LSID, URN, Bibcode, arXiv, PubMed ID, PubMed Central ID, GND. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-F1-01DD-1 + metric_test_name: Data identifier is resolvable and follows a defined unique identifier syntax (IRI, URL) + metric_test_score: 1 + metric_test_maturity: 3 + - metric_test_identifier: FsF-F1-01DD-2 + metric_test_name: Data identifier is not resolvable but follows an UUID or HASH type syntax + metric_test_score: 0.5 + metric_test_maturity: 1 + created_by: FAIRsFAIR + date_created: 2023-05-15 + date_updated: 2023-05-15 + version: 0.6 + total_score: 1 + +- metric_identifier: FsF-F1-02DD + metric_number: 4 + metric_short_name: Persistent Identifier Data + metric_name: Data is assigned a persistent identifier. + description: We make a distinction between data and metadata as well as the uniqueness and persistence of an identifier as explained in FsF-F1-02M. The persistent identifier (PID) of a data may point to a file or streaming object or a service providing that data. + fair_principle: F1 + target: Data + evaluation_mechanism: A persistent identifier is considered to be valid if the given identifier complies with a valid PID synthax. To be valid, the PID further has to be resolvable. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-F1-02DD-1 + metric_test_name: A data identifier follows a defined persistent identifier syntax + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: https://f-uji.net/vocab/identifier/persistent + tested_on: https://f-uji.net/vocab/metadata/property/object_identifier + modality: any + - metric_test_identifier: FsF-F1-02DD-2 + metric_test_name: A persistent identifier of data is resolvable + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2023-05-15 + date_updated: 2023-05-15 + version: 0.6 + total_score: 1 + +- metric_identifier: FsF-F2-01M + metric_number: 3 + metric_short_name: Descriptive Core Metadata + metric_name: Metadata includes descriptive core elements (creator, title, data identifier, publisher, publication date, summary and keywords) to support data findability. + description: Metadata is descriptive information about a data object. Since the metadata required differs depending on the users and their applications, this metric focuses on core metadata. The core metadata is the minimum descriptive information required to enable data finding, including citation which makes it easier to find data. We determine the required metadata based on common data citation guidelines (e.g., DataCite, ESIP, and IASSIST), and metadata recommendations for data discovery (e.g., EOSC Datasets Minimum Information (EDMI), DataCite Metadata Schema, W3C Recommendation Data on the Web Best Practices and Data Catalog Vocabulary). This metric focuses on domain-agnostic core metadata. Domain or discipline-specific metadata specifications are covered under metric FsF-R1.3-01M. A repository should adopt a schema that includes properties of core metadata, whereas data authors should take the responsibility of providing core metadata. + fair_principle: F2 + target: Metadata + evaluation_mechanism: Metadata can be offered in different ways. here we focus on common web based strategies. These include 1) embedding metadata within the landing page such as JSON-LD, OpenGraph, Microdata, Dublin Core, 2) offering typed links which lead to metadata within the HTML code of the metadata or signposting links. 3) enable content negotiation and deliver e.g. RDF, JSON-LD or XML on demand. The metric evaluates the completeness of metadata in case metadata has been retrieved. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-F2-01M-1 + metric_test_name: Metadata has been made available via common web methods + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: https://f-uji.net/vocab/metadata/offering_method + modality: any + - metric_test_identifier: FsF-F2-01M-2 + metric_test_name: Core data citation metadata is available + metric_test_score: 0.5 + metric_test_maturity: 2 + metric_test_requirements: + - target: https://f-uji.net/vocab/metadata/property + modality: all + tested_on: https://f-uji.net/vocab/metadata/property + required: + name: + - creator + - title + - object_identifier + - publication_date + - publisher + - object_type + - metric_test_identifier: FsF-F2-01M-3 + metric_test_name: Core descriptive metadata is available + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: https://f-uji.net/vocab/metadata/property + modality: all + tested_on: https://f-uji.net/vocab/metadata/property + required: + name: + - creator + - title + - object_identifier + - publication_date + - publisher + - object_type + - summary + - keywords + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2022-05-30 + version: 0.5 + total_score: 2 + +- metric_identifier: FsF-F3-01M + metric_number: 4 + metric_short_name: Inclusion of Data Identifier in Metadata + metric_name: Metadata includes the identifier of the data it describes. + description: The metadata should explicitly specify the identifier of the data such that users can discover and access the data through the metadata. If the identifier specified is persistent and points to a landing page, the data identifier and links to download the data content should be taken into account in the assessment. + fair_principle: F3 + target: Metadata + evaluation_mechanism: Several metadata standards provide the possibility to include links to the actual data content. The presence of such links is evaluated here. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-F3-01M-1 + metric_test_name: Metadata contains data content related information (file name, size, type) + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: https://f-uji.net/vocab/data/property + tested_on: https://f-uji.net/vocab/metadata/property/object_content_identifier + modality: all + required: + - type + - size + - metric_test_identifier: FsF-F3-01M-2 + metric_test_name: Metadata contains a PID or URL which indicates the location of the downloadable data content + metric_test_score: 0.5 + metric_test_maturity: 3 + metric_test_requirements: + - target: https://f-uji.net/vocab/data/property/url + tested_on: https://f-uji.net/vocab/metadata/property/object_content_identifier + modality: any + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2022-05-30 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-F4-01M + metric_number: 5 + metric_short_name: Searchable Metadata + metric_name: Metadata is offered in such a way that it can be retrieved programmatically. + description: This metric refers to ways through which the metadata of data is exposed or provided in a standard and machine-readable format. Assessing this metric will require an understanding of the capabilities offered by the data repository used to host the data. Metadata may be available through multiple endpoints. For example, if data is hosted by a repository, the repository may disseminate its metadata through a metadata harvesting protocol (e.g., via OAI-PMH) and/or a web service. Metadata may also be embedded as structured data on a data page for use by web search engines such as Google and Bing or be available as linked (open) data. + fair_principle: F4 + target: Metadata + evaluation_mechanism: The metric is evaluated using the given metadata standards known to support major search engines such as JSON-LD and Dublin Core. Presence of metadata in research data registries is further evaluated. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-F4-01M-1 + metric_test_name: Metadata is given in a way major search engines can ingest it for their catalogues (embedded JSON-LD, Dublin Core or RDFa) + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: http://f-uji.net/vocab/metadata/standard + modality: any + required: + name: + - dublin-core + - schemaorg + - dcat-data-catalog-vocabulary + - target: http://f-uji.net/vocab/metadata/offering_method + modality: any + required: + name: + - rdfa + - microdata + - meta_tag + - json_in_html + - metric_test_identifier: FsF-F4-01M-2 + metric_test_name: Metadata is registered in major research data registries (DataCite) + metric_test_score: 1 + metric_test_maturity: 2 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2022-05-30 + version: 0.5 + total_score: 2 + +- metric_identifier: FsF-A1-01M + metric_number: 6 + metric_short_name: Data Access Information + metric_name: Metadata contains access level and access conditions of the data. + description: This metric determines if the metadata includes the level of access to the data such as public, embargoed, restricted, or metadata-only access and its access conditions. Both access level and conditions are necessary information to potentially gain access to the data. It is recommended that data should be as open as possible and as closed as necessary. There are no access conditions for public data. Datasets should be released into the public domain (e.g., with an appropriate public-domain-equivalent license such as Creative Commons CC0 licence) and openly accessible without restrictions when possible. Embargoed access refers to data that will be made publicly accessible at a specific date which should be specified in the metadata. For example, a data author may release their data after having published their findings from the data. Therefore, access conditions such as the date the data will be released publically is essential. Restricted access refers to data that can be accessed under certain conditions (e.g. because of commercial, sensitive, or other confidentiality reasons or the data is only accessible via a subscription or a fee). Restricted data may be available to a particular group of users or after permission is granted. For restricted data, the metadata should include the conditions of access to the data such as point of contact or instructions to access the data. Metadata-only access refers to data that is not made publicly available and for which only metadata is publicly available. + fair_principle: A1 + target: Metadata + evaluation_mechanism: Metric evaluation is based on the presence of access information in an appropriate metadata element/field. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-A1-01M-1 + metric_test_name: Information about access restrictions or rights can be identified in metadata + metric_test_score: 0.5 + metric_test_maturity: 1 + metric_test_requirements: + - target: http://f-uji.net/vocab/metadata/property/access_level + modality: any + - metric_test_identifier: FsF-A1-01M-3 + metric_test_name: Data access information is indicated by (not machine readable) standard terms + metric_test_score: 1 + metric_test_maturity: 2 + metric_test_requirements: + - target: http://f-uji.net/vocab/access_condition + modality: any + tested_on: http://f-uji.net/vocab/metadata/property/access_level + comment: label and id + - metric_test_identifier: FsF-A1-01M-2 + metric_test_name: Data access information is machine readable + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: http://f-uji.net/vocab/access_condition + modality: any + tested_on: http://f-uji.net/vocab/metadata/property/access_level + comment: identifier (namespace) + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-12-03 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-A1-03D + metric_number: 8 + metric_short_name: Standardized Communication Protocol of Data + metric_name: Data is accessible through a standardized communication protocol. + description: Given an identifier of a dataset, the dataset should be retrievable using a standard communication protocol such as HTTP, HTTPS, FTP, TFTP, SFTP, FTAM and AtomPub. Avoid disseminating data using a proprietary protocol. + fair_principle: A1 + target: Data + evaluation_mechanism: The data link which is given in the metadata is tested for an standard communication protocol + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-A1-03D-1 + metric_test_name: Metadata includes a resolvable link to data based on standardized web communication protocols. + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-10-23 + date_updated: 2020-12-05 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-A1-02M + metric_number: 7 + metric_short_name: Standardized Communication Protocol of Metadata + metric_name: Metadata is accessible through a standardized communication protocol. + description: Given an identifier of a dataset, the metadata of the dataset should be retrievable using a standard communication protocol such as HTTP, HTTPS, FTP, TFTP, SFTP, FTAM and AtomPub. Avoid disseminating data using a proprietary protocol. + fair_principle: A1 + target: Metadata + evaluation_mechanism: The URI scheme of the landing page is tested for a standard communication protocol + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-A1-02M-1 + metric_test_name: Landing page link is based on standardized web communication protocols. + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-10-23 + date_updated: 2020-12-05 + version: 0.5 + total_score: 1 + +#- metric_identifier: FsF-A2-01M +# metric_number: 9 +# metric_short_name: Metadata Preservation +# metric_name: Metadata remains available, even if the data is no longer available. +# description: This metric determines if the metadata will be preserved even when the data they represent are no longer available, replaced or lost. +# fair_principle: A2 +# target: Metadata +# evaluation_mechanism: Currently this metric can only be assessed using the persistent identifier as an indicator. DOI metadata is preserved by DataCite. +# metric_tests: +# - metric_test_identifier: FsF-A2-01M-1 +# metric_test_name: The persistent identifier system used guarantees the preservation of associated metadata +# metric_test_score: 1 +# metric_test_maturity: 3 +# created_by: FAIRsFAIR +# date_created: 2020-07-08 +# date_updated: 2020-12-05 +# version: 0.5 +# total_score: 1 + +- metric_identifier: FsF-I1-01M + metric_number: 10 + metric_short_name: Formal Representation of Metadata + metric_name: Metadata is represented using a formal knowledge representation language. + description: Knowledge representation is vital for machine-processing of the knowledge of a domain. Expressing the metadata of a data object using a formal knowledge representation will enable machines to process it in a meaningful way and enable more data exchange possibilities. Examples of knowledge representation languages are RDF, RDFS, and OWL. These languages may be serialized (written) in different formats. For instance, RDF/XML, RDFa, Notation3, Turtle, N-Triples and N-Quads, and JSON-LD are RDF serialization formats. + fair_principle: I1 + target: Metadata + evaluation_mechanism: Metadata has to be serialised in a common formal knowledge representation language. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-I1-01M-1 + metric_test_name: Parsable, structured metadata (JSON-LD, RDFa) is embedded in the landing page XHTML/HTML code + metric_test_score: 1 + metric_test_maturity: 2 + metric_test_requirements: + - target: http://f-uji.net/vocab/metadata/format + modality: any + required: + name: + - RDF + - JSON-LD + - RDFa + - target: http://f-uji.net/vocab/metadata/offering_method + modality: any + required: + name: + - meta_tag + - microdata + - rdfa + - json_in_html + - metric_test_identifier: FsF-I1-01M-2 + metric_test_name: Parsable, graph data (RDF, JSON-LD) is accessible through content negotiation, typed links or sparql endpoint + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: http://f-uji.net/vocab/metadata/format + modality: any + required: + name: + - RDF + - JSON-LD + - RDFa + - target: http://f-uji.net/vocab/metadata/offering_method + modality: any + required: + name: + - content_negotiation + - target: http://f-uji.net/vocab/metadata/exchange_service + modality: any + required: + name: + - sparql + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2023-06-01 + version: 0.5 + total_score: 2 + +- metric_identifier: FsF-I2-01M + metric_number: 11 + metric_short_name: Metadata with Semantic Resources + metric_name: Metadata uses semantic resources + description: A metadata document or selected parts of the document may incorporate additional terms from semantic resources (also referred as semantic artefacts) so that the contents are unambiguous and can be processed automatically by machines. This enrichment facilitates enhanced data search and interoperability of data from different sources. Ontology, thesaurus, and taxonomy are kinds of semantic resources, and they come with varying degrees of expressiveness and computational complexity. Knowledge organization schemes such as thesaurus and taxonomy are semantically less formal than ontologies. + fair_principle: I2 + target: Metadata + evaluation_mechanism: Used namespaces are identified in given graph or XML metadata and verified using a controlled list. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-I2-01M-1 + metric_test_name: Vocabulary namespace URIs can be identified in metadata + metric_test_score: 0 + metric_test_maturity: 1 + metric_test_requirements: + - comment: The sheer existence of namespaces declared in XML or RDF files is checked here. This test is not scored + - metric_test_identifier: FsF-I2-01M-2 + metric_test_name: Namespaces of known semantic resources can be identified in metadata + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - target: http://f-uji.net/vocab/semantic_resource + modality: any + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-12-03 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-I3-01M + metric_number: 12 + metric_short_name: Links to related entities + metric_name: Metadata includes links between the data and its related entities. + description: Linking data to its related entities will increase its potential for reuse. The linking information should be captured as part of the metadata. A dataset may be linked to its prior version, related datasets or resources (e.g. publication, physical sample, funder, repository, platform, site, or observing network registries). Links between data and its related entities should be expressed through relation types (e.g., DataCite Metadata Schema specifies relation types between research objects through the fields ‘RelatedIdentifier’ and ‘RelationType’), and preferably use persistent Identifiers for related entities (e.g., ORCID for contributors, DOI for publications, and ROR for institutions). + fair_principle: I3 + target: Metadata + evaluation_mechanism: Metadata is checked for existing relations to related entities which can be e.g. citations or other related resources + metric_tests: + - metric_test_identifier: FsF-I3-01M-1 + metric_test_name: Related resources are explicitly mentioned in metadata + metric_test_score: 1 + metric_test_maturity: 2 + metric_test_requirements: + - target: http://f-uji.net/vocab/relation_type + modality: any + tested_on: http://f-uji.net/vocab/metadata/property/related_resources + comment: The presence of a (typed, default = related) related resource is checked, can be a string or URI + - metric_test_identifier: FsF-I3-01M-2 + metric_test_name: Related resources are indicated by machine readable links or identifiers + metric_test_requirements: + - comment: same as above but relations have to be machine readable/actionable + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-12-03 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-R1-01MD + metric_number: 13 + metric_short_name: Metadata of Data Content + metric_name: Metadata specifies the content of the data. + description: This metric evaluates if a description (properties) of the content of the data is specified in the metadata. The description should be an accurate reflection of the actual data deposited. Data content descriptors include but are not limited to resource type (e.g., data or a collection of data), variable(s) measured or observed, method, data format and size. Ideally, ontological vocabularies should be used to describe data content to support interdisciplinary reuse. + fair_principle: R1 + target: Metadata, Data + evaluation_mechanism: Metric is evaluated using the resource type given in the metadata as well as data object specific properties file size and file type. Further presence of measured variables is tested. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-R1-01MD-1_ss + metric_test_name: Minimal information about available data content is given in metadata + metric_test_score: 1 + metric_test_maturity: 1 + - metric_test_identifier: FsF-R1-01MD-1a + metric_test_name: Resource type (e.g. dataset) is given in metadata + metric_test_score: 0 + - metric_test_identifier: FsF-R1-01MD-1b + metric_test_name: Information about data content (e.g. links) is given in metadata + metric_test_score: 0 + - metric_test_identifier: FsF-R1-01MD-2 + metric_test_name: Verifiable data descriptors (file info, measured variables or observation types) are specified in metadata + metric_test_score: 1 + metric_test_maturity: 2 + - metric_test_identifier: FsF-R1-01MD-2a + metric_test_name: File size and type information are specified in metadata + metric_test_score: 0 + - metric_test_identifier: FsF-R1-01MD-2b + metric_test_name: Measured variables or observation types are specified in metadata + metric_test_score: 0 + - metric_test_identifier: FsF-R1-01MD-3 + metric_test_name: Data content matches file type and size specified in metadata + metric_test_score: 1 + metric_test_maturity: 3 + - metric_test_identifier: FsF-R1-01MD-4 + metric_test_name: Data content matches measured variables or observation types specified in metadata + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-07-08 + version: 0.5 + total_score: 4 + +- metric_identifier: FsF-R1.1-01M + metric_number: 14 + metric_short_name: Data Usage License + metric_name: Metadata includes license information under which data can be reused. + description: This metric evaluates if data is associated with a license because otherwise users cannot reuse it in a clear legal context. We encourage the application of licenses for all kinds of data whether public, restricted or for specific users. Without an explicit license, users do not have a clear idea of what can be done with your data. Licenses can be of standard type (Creative Commons, Open Data Commons Open Database License) or bespoke licenses, and rights statements which indicate the conditions under which data can be reused. It is highly recommended to use a standard, machine-readable license such that it can be interpreted by machines and humans. In order to inform users about what rights they have to use a dataset, the license information should be specified as part of the dataset’s metadata. + fair_principle: R1.1 + target: Metadata + evaluation_mechanism: Metric evaluation is based on the presence of a machine readable license information in an appropriate metadata element/field. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-R1.1-01M-1 + metric_test_name: Licence information is given in an appropriate metadata element + metric_test_score: 1 + metric_test_maturity: 1 + - metric_test_identifier: FsF-R1.1-01M-2 + metric_test_name: Recognized licence is valid (community specific or registered at SPDX) + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2023-06-02 + version: 0.5 + total_score: 2 + +- metric_identifier: FsF-R1.2-01M + metric_number: 15 + metric_short_name: Data Provenance + metric_name: Metadata includes provenance information about data creation or generation. + description: >- + Data provenance (also known as lineage) represents a dataset’s history, including the people, entities, and processes involved in its creation, management and longer-term curation. It is essential to provide provenance information about your data to provide valuable context and to enable informed use and reuse. The levels of provenance information needed can vary depending on the data type (e.g., measurement, observation, derived data, or data product) and research domains. For that reason, it is difficult to define a set of finite provenance properties that will be adequate for all domains. Based on existing work, we suggest that the following provenance properties of data generation or collection are included in the metadata record as a minimum. + (a) Sources of data, e.g., datasets the data is derived from and instruments + (b) Data creation or collection date + (c) Contributors involved in data creation and their roles + (d) Data publication, modification and versioning information + There are various ways through which provenance information may be included in a metadata record. Some of the provenance properties (e.g., instrument, contributor) may be best represented using PIDs (such as DOIs for data, ORCIDs for researchers). + This way, humans and systems can retrieve more information about each of the properties by resolving the PIDs. Alternatively, the provenance information can be given in a linked provenance record expressed explicitly in e.g., PROV-O or PAV or Vocabulary of Interlinked Datasets (VoID). + fair_principle: R1.2 + target: Metadata + evaluation_mechanism: Metrics are assessed using provenance related information contained in metadata which can either be specific elements which can be mapped e.g. to PROV-O or the use of provenance related namespaces and associated terms. + test_scoring_mechanism: cumulative + metric_tests: + - metric_test_identifier: FsF-R1.2-01M-1 + metric_test_name: Metadata contains elements which hold provenance information and can be mapped to PROV + metric_test_score: 1 + metric_test_maturity: 2 + - metric_test_identifier: FsF-R1.2-01M-2 + metric_test_name: Metadata contains provenance information using formal provenance ontologies (PROV-O) + metric_test_score: 1 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2023-06-01 + version: 0.5 + total_score: 2 + +- metric_identifier: FsF-R1.3-01M + metric_number: 16 + metric_short_name: Community-Endorsed Metadata Standard + metric_name: Metadata follows a standard recommended by the target research community of the data. + description: In addition to core metadata required to support data discovery (covered under metric FsF-F2-01M), metadata to support data reusability should be made available following community-endorsed metadata standards. Some communities have well-established metadata standards (e.g., geospatial [ISO19115], biodiversity [DarwinCore, ABCD, EML], social science [DDI], astronomy [International Virtual Observatory Alliance Technical Specifications]) while others have limited standards or standards that are under development (e.g., engineering and linguistics). The use of community-endorsed metadata standards is usually encouraged and supported by domain and discipline-specific repositories. + fair_principle: R1.3 + target: Metadata + evaluation_mechanism: Metadata encodings can be verified using community specific namespaces and schemas listed by the RDA metadata standards WG or fairsharing.org + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-R1.3-01M-1 + metric_test_name: Community specific metadata standard is detected using namespaces or schemas found in provided metadata or metadata services outputs + metric_test_score: 1 + metric_test_maturity: 3 + metric_test_requirements: + - modality: any except + target: https://f-uji.net/vocab/metadata/standards + required: + field_of_science: + - science + - generic + comment: test performed on namespaces or schemas found in exposed metadata + - metric_test_identifier: FsF-R1.3-01M-2 + metric_test_name: Community specific metadata standard is listed in the re3data record of the responsible repository + metric_test_score: 1 + metric_test_maturity: 2 + metric_test_requirements: + - modality: any except + target: https://f-uji.net/vocab/metadata/standards + required: + field_of_science: + - science + - generic + comment: test is performed using information collected from re3data + - metric_test_identifier: FsF-R1.3-01M-3 + metric_test_name: Multidisciplinary but community endorsed metadata (RDA Metadata Standards Catalog, fairsharing) standard is listed in the re3data record or detected by namespace + metric_test_score: 1 + metric_test_maturity: 1 + metric_test_requirements: + - modality: any + target: https://f-uji.net/vocab/metadata/standards + required: + field_of_science: + - science + - generic + source: + - rd-alliance.org + - fairsharing.org + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-12-03 + version: 0.5 + total_score: 1 + +- metric_identifier: FsF-R1.3-02D + metric_number: 17 + metric_short_name: Data File format + metric_name: Data is available in a file format recommended by the target research community. + description: >- + File formats refer to methods for encoding digital information. For example, CSV for tabular data, NetCDF for multidimensional data and GeoTIFF for raster imagery. Data should be made available in a file format that is backed by the research community to enable data sharing and reuse. Consider for example, file formats that are widely used and supported by the most commonly used software and tools. These formats also should be suitable for long-term storage and archiving, which are usually recommended by a data repository. The formats not only give a higher certainty that your data can be read in the future, but they will also help to increase the reusability and interoperability. Using community-endorsed formats enables data to be loaded directly into the software and tools used for data analysis. It makes it possible to easily integrate your data with other data using the same preferred format. The use of preferred formats will also help to transform the format to a newer one, in case a preferred format gets outdated. + Similar to metric FsF-F4-01M, answering this metric will require an understanding of the capabilities offered, data preservation plan and policies implemented by the data repository and data services (e.g., Datacite PID service). + Continued access to metadata depends on a data repository’s preservation practice which is usually documented in the repository’s service policies or statements. + A trustworthy data repository offering DOIs and implementing a PID Policy should guarantee that metadata will remain accessible even when data is no longer available for any reason (e.g., by providing a tombstone page). + fair_principle: R1.3 + target: Data + evaluation_mechanism: Data file format given in metadata is compared to a controlled list of known scientific formats. + test_scoring_mechanism: alternative + metric_tests: + - metric_test_identifier: FsF-R1.3-02D-1 + metric_test_name: The format of a data file given in the metadata is listed in the long term file formats, open file formats or scientific file formats controlled list + metric_test_score: 1 + - metric_test_identifier: FsF-R1.3-02D-1a + metric_test_name: The format of the data file is an open format + metric_test_score: 0 + metric_test_maturity: 1 + - metric_test_identifier: FsF-R1.3-02D-1b + metric_test_name: The format of the data file is a long term format + metric_test_score: 0 + metric_test_maturity: 2 + - metric_test_identifier: FsF-R1.3-02D-1c + metric_test_name: The format of the data file is a scientific format + metric_test_score: 0 + metric_test_maturity: 3 + created_by: FAIRsFAIR + date_created: 2020-07-08 + date_updated: 2020-12-03 + version: 0.5 + total_score: 1 + metric_specification: 10.5281/zenodo.6461229