diff --git a/.github/workflows/reports.yml b/.github/workflows/reports.yml index 7199aba7..2a84c196 100644 --- a/.github/workflows/reports.yml +++ b/.github/workflows/reports.yml @@ -20,12 +20,12 @@ jobs: actions: read steps: - name: Download and Extract Artifacts - uses: dawidd6/action-download-artifact@e7466d1a7587ed14867642c2ca74b5bcc1e19a2d # v3.0.0 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3.1.4 with: run_id: ${{ github.event.workflow_run.id }} path: artifacts - name: Publish Test Results - uses: EnricoMi/publish-unit-test-result-action@e780361cd1fc1b1a170624547b3ffda64787d365 # v2.12.0 + uses: EnricoMi/publish-unit-test-result-action@30eadd5010312f995f0d3b3cff7fe2984f69409e # v2.16.1 with: comment_title: ':clipboard: Pytest Results' commit: ${{ github.event.workflow_run.head_sha }} @@ -43,7 +43,7 @@ jobs: actions: read steps: - name: Download and Extract Artifacts - uses: dawidd6/action-download-artifact@e7466d1a7587ed14867642c2ca74b5bcc1e19a2d # v3.0.0 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3.1.4 with: run_id: ${{ github.event.workflow_run.id }} path: artifacts diff --git a/README.md b/README.md index 61359afa..6cccb572 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Thanks to [Heinz-Alexander Fuetterer](https://github.com/afuetterer) for his con [![Coverage](https://pangaea-data-publisher.github.io/fuji/coverage/coveragebadge.svg)](https://pangaea-data-publisher.github.io/fuji/coverage/) [![Publish Docker image](https://github.com/pangaea-data-publisher/fuji/actions/workflows/publish-docker.yml/badge.svg)](https://github.com/pangaea-data-publisher/fuji/actions/workflows/publish-docker.yml) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4063720.svg)](https://doi.org/10.5281/zenodo.4063720) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11084909.svg)](https://doi.org/10.5281/zenodo.11084909) ## Overview @@ -233,6 +233,11 @@ The multiple test methods within an evaluator also check whether their specific For each metric, the maturity is determined as the maximum of the maturity associated with each passed test. This means that if a test indicating maturity 3 is passed and one indicating maturity 2 is not passed, the metric will still be shown to be fulfilled with maturity 3. +### Community specific metrics + +Some, not all, metrics can be configured using the following guidelines: +[Metrics configuration guide](https://github.com/pangaea-data-publisher/fuji/blob/master/metrics_configuration.md) + ### Updates to the API Making changes to the API requires re-generating parts of the code using Swagger. diff --git a/fuji_server/evaluators/fair_evaluator_license.py b/fuji_server/evaluators/fair_evaluator_license.py index 24b90f5f..a2db9ece 100644 --- a/fuji_server/evaluators/fair_evaluator_license.py +++ b/fuji_server/evaluators/fair_evaluator_license.py @@ -50,6 +50,7 @@ def setLicenseDataAndOutput(self): specified_licenses = [specified_licenses] if specified_licenses is not None and specified_licenses != []: for license in specified_licenses: + # print(license) isurl = False licence_valid = False license_output = LicenseOutputInner() @@ -58,8 +59,12 @@ def setLicenseDataAndOutput(self): if isurl: iscc, generic_cc = self.isCreativeCommonsLicense(license, self.metric_identifier) if iscc: - license = generic_cc - spdx_uri, spdx_osi, spdx_id = self.lookup_license_by_url(license, self.metric_identifier) + spdx_osi = True + spdx_uri = license + spdx_id = generic_cc + # license = generic_cc + else: + spdx_uri, spdx_osi, spdx_id = self.lookup_license_by_url(license, self.metric_identifier) else: # maybe licence name spdx_uri, spdx_osi, spdx_id = self.lookup_license_by_name(license, self.metric_identifier) license_output.license = license diff --git a/fuji_server/evaluators/fair_evaluator_searchable.py b/fuji_server/evaluators/fair_evaluator_searchable.py index f554b908..944f54b9 100644 --- a/fuji_server/evaluators/fair_evaluator_searchable.py +++ b/fuji_server/evaluators/fair_evaluator_searchable.py @@ -153,6 +153,11 @@ def testSearchEngineCompatibleMetadataAvailable(self): search_engine_support_match.append( standard_found + " via: " + found_metadata.get("offering_method") ) + else: + self.logger.info( + self.metric_identifier + + "Found RDFa like metadata which however is empty thus useless for search engines" + ) search_engine_support_match = list(set(search_engine_support_match)) # OLD WAY # Check search mechanisms based on sources of metadata extracted. diff --git a/fuji_server/helper/metadata_collector_rdf.py b/fuji_server/helper/metadata_collector_rdf.py index 6c00ec80..71bb6ed3 100644 --- a/fuji_server/helper/metadata_collector_rdf.py +++ b/fuji_server/helper/metadata_collector_rdf.py @@ -1015,7 +1015,7 @@ def get_dcat_metadata(self, graph): dcat_metadata = self.get_metadata(graph, datasets[0], type="Dataset") # distribution distribution = graph.objects(datasets[0], DCAT.distribution) - + # do something (check for table headers) with the table here.. for t in table: print(t) dcat_metadata["object_content_identifier"] = [] @@ -1057,7 +1057,7 @@ def get_dcat_metadata(self, graph): dist, DCTERMS.rights ) dtype = graph.value(dist, DCAT.mediaType) - dsize = graph.value(dist, DCAT.bytesSize) + dsize = graph.value(dist, DCAT.byteSize) if durl or dtype or dsize: if idutils.is_url(str(durl)): dtype = "/".join(str(dtype).split("/")[-2:]) diff --git a/metrics_configuration.md b/metrics_configuration.md new file mode 100644 index 00000000..0ac6d5f9 --- /dev/null +++ b/metrics_configuration.md @@ -0,0 +1,51 @@ +# F-UJI configuration options + +Since version 3.0.0 F-UJI offers a variety of configuration options which allows to use user defined metrics and to restrict metadata harvesting methods. + +## Metric YAML + +You can define your own metric definitions in a dedicated YAML file. Metrics YAML files have to comply with the following conventions: + +* Files need to be located in folder 'yaml' +* File names have to follow this syntax: metrics_[version][community_code].yaml + where [version] has to be a number must be a number, which can optionally have one decimal point. + +By now, user define metrics have to be based on metrics file 'metrics_0.6.yaml' which should be used as template. + +Copy the YAML content of this metric file to a new metric file and save the file following the syntax mentioned above for the file name of the new metrics e.g. metrics_0.6new.yaml. + +To define own metrics you can restrict the number of metrics and add configuration options to a limited number of existing metrics. + +### Configure metrics and tests to be performed + +To restrict metrics choose those you want to use from the 0.6 list of metrics and tests and simply delete tests or metrics which you do not wish to be performed during your assessments. + +### Configure individual metrics tests + +For all metrics and tests you can change the YAML properties*metric_short_name*, *metric_name* and *description* according to your needs. + +For some tests you can define additional parameters. For example, one can specify exactly which metadata elements, licenses, metadata standards or vocabularies are expected. + +Generally, these specifications are defined using the YAML property *community_requirements* which has to be a dictionary containing the subproperties *target*, *modality*, and *required*. + +* *target* defines the test targets, defined in the F-UJI ontology, such as licenses, metadata properties etc. which is represented by a controlled list of values which is used for tests by default. +* *required* has to be a list which defines the necessary property values +* *modality* defines if *all* or *any* of *required* values need to be present to pass the test. +*match* specifies how matching values are identified: *wildcard* for wildcard-like match rules like 'test*'; *full* when a full match is required. +*target_property* additionally defines the property of the *target* object in which matches are searched for, by default the property *name* or *label* is used for this purpose. +*modality* and *match* are currently not yet implemented, thus still hardcoded :( but may be implemented in future versions. + +## Selectin a metric within an API call + +Within the POST data you need to specify the metric which has to be used. To do this, use the *metric_version* argument: +~~~ +{ + "object_identifier": "https://doi.org/10.1594/PANGAEA.908011", + "test_debug": true, + "metadata_service_endpoint": "http://ws.pangaea.de/oai/provider", + "metadata_service_type": "oai_pmh", + "use_datacite": true, + "use_github": false, + "metric_version": "metrics_v0.5" +} +~~~ diff --git a/pyproject.toml b/pyproject.toml index 106b37bf..bff44a7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "hashid~=3.1.4", "idutils~=1.2", "jmespath~=1.0", - "levenshtein~=0.24.0", + "levenshtein~=0.25.0", "lxml==5.1.0", "pandas~=2.1", "pyRdfa3~=3.5", @@ -83,7 +83,7 @@ report = [ testing = [ "genbadge[coverage]~=1.1", "pytest~=8.0", - "pytest-cov~=4.1", + "pytest-cov~=5.0", "pytest-randomly~=3.15", "pytest-recording~=0.13", "pytest-xdist~=3.3"