Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
huberrob committed Dec 8, 2023
2 parents 1d0766c + 3a9dd19 commit a91e5cc
Show file tree
Hide file tree
Showing 80 changed files with 445 additions and 436 deletions.
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: end-of-file-fixer
- id: mixed-line-ending
Expand All @@ -14,7 +14,7 @@ repos:
- id: file-contents-sorter
files: ^fuji_server\/data\/.*.txt$
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.10.0
rev: v2.11.0
hooks:
- id: pretty-format-ini
args: [--autofix]
Expand All @@ -23,11 +23,11 @@ repos:
- id: pretty-format-yaml
args: [--autofix, --indent, '2']
- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.11.0
hooks:
- id: black
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.287
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.6
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# syntax=docker/dockerfile:1.5
FROM python:3.12-slim
FROM python:3.11-slim

WORKDIR /usr/src/app

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# F-UJI (FAIRsFAIR Research Data Object Assessment Service)
Developers: [Robert Huber](mailto:[email protected]), [Anusuriya Devaraju](mailto:[email protected])

Thanks to [Heinz-Alexander Fuetterer](https://github.com/afuetterer) for his contributions and his help in cleaning up the code.

[![CI](https://github.com/pangaea-data-publisher/fuji/actions/workflows/ci.yml/badge.svg)](https://github.com/pangaea-data-publisher/fuji/actions/workflows/ci.yml)
[![Publish Docker image](https://github.com/pangaea-data-publisher/fuji/actions/workflows/publish-docker.yml/badge.svg)](https://github.com/pangaea-data-publisher/fuji/actions/workflows/publish-docker.yml)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4063720.svg)](https://doi.org/10.5281/zenodo.4063720)
Expand Down
4 changes: 2 additions & 2 deletions fuji_server/controllers/fair_metric_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from fuji_server.helper.metric_helper import MetricHelper


def get_metrics(version): # noqa: E501
def get_metrics(version):
"""Return all metrics and their definitions.
:rtype: Metrics
"""
Expand All @@ -40,7 +40,7 @@ def get_metrics(version): # noqa: E501
return response, 404


def get_metric(version, metric): # noqa: E501
def get_metric(version, metric):
"""Return all metrics and their definitions.
:rtype: Metrics
"""
Expand Down
6 changes: 3 additions & 3 deletions fuji_server/controllers/fair_object_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
from fuji_server.controllers.fair_check import FAIRCheck
from fuji_server.helper.identifier_helper import IdentifierHelper
from fuji_server.helper.preprocessor import Preprocessor
from fuji_server.models.body import Body # noqa: E501
from fuji_server.models.fair_results import FAIRResults # noqa: E501
from fuji_server.models.body import Body
from fuji_server.models.fair_results import FAIRResults


def assess_by_id(body): # noqa: E501
def assess_by_id(body):
"""assess_by_id
Evaluate FAIRness of a data object based on its identifier # noqa: E501
Expand Down
10 changes: 5 additions & 5 deletions fuji_server/controllers/harvest_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
import connexion

from fuji_server.controllers.fair_check import FAIRCheck
from fuji_server.models.harvest import Harvest # noqa: E501
from fuji_server.models.harvest_results import HarvestResults # noqa: E501
from fuji_server.models.harvest_results_metadata import HarvestResultsMetadata # noqa: E501
from fuji_server.models.harvest import Harvest
from fuji_server.models.harvest_results import HarvestResults
from fuji_server.models.harvest_results_metadata import HarvestResultsMetadata


def harvest_by_id(body=None): # noqa: E501
def harvest_by_id(body=None):
"""harvest_by_id
Harvest metadata of a data object based on its identifier # noqa: E501
Expand All @@ -42,7 +42,7 @@ def harvest_by_id(body=None): # noqa: E501
:rtype: HarvestResults
"""
if connexion.request.is_json:
body = Harvest.from_dict(connexion.request.get_json()) # noqa: E501
body = Harvest.from_dict(connexion.request.get_json())
identifier = body.object_identifier
auth_token = body.auth_token
auth_token_type = body.auth_token_type
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/evaluators/fair_evaluator_file_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def setFileFormatDict(self):
guessed_mime_type = mimetypes.guess_type(data_file.get("url"))
mime_type = guessed_mime_type[
0
] # the return value is a tuple (type, encoding) where type is None if the type cant be guessed
] # the return value is a tuple (type, encoding) where type is None if the type can`t be guessed
if mime_type:
self.logger.info(f"FsF-R1.3-02D : Mime type guess return value -: {mime_type}")
else:
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/evaluators/fair_evaluator_minimal_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def testCoreDescriptiveMetadataAvailable(self):
core_missing = list(set(self.required_metadata_properties) - set(self.metadata_found))
self.logger.warning(
self.metric_identifier
+ f" : Not all required core descriptive metadata elements exist, missing -: {str(core_missing)}"
+ f" : Not all required core descriptive metadata elements exist, missing -: {core_missing!s}"
)
return test_status

Expand Down
2 changes: 1 addition & 1 deletion fuji_server/harvester/data_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def retrieve_all_data(self, scan_content=True):
timeout = 10
if len(ft) > self.max_number_per_mime:
self.logger.warning(
f"FsF-F3-01M : Found more than -: {str(self.max_number_per_mime)} data links (out of {str(len(ft))}) of type {fmime} will only take {str(self.max_number_per_mime)}"
f"FsF-F3-01M : Found more than -: {self.max_number_per_mime!s} data links (out of {len(ft)!s}) of type {fmime} will only take {self.max_number_per_mime!s}"
)
files_to_check = ft[: self.max_number_per_mime]
# add the fifth one for compatibility reasons < f-uji 3.0.1, when we took the last of list of length FILES_LIMIT
Expand Down
8 changes: 4 additions & 4 deletions fuji_server/harvester/metadata_harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,7 @@ def retrieve_metadata_external_rdf_negotiated(self, target_url_list=[]):
if rdf_dict:
self.logger.log(
self.LOG_SUCCESS,
f"FsF-F2-01M : Found Linked Data metadata -: {str(rdf_dict.keys())}",
f"FsF-F2-01M : Found Linked Data metadata -: {rdf_dict.keys()!s}",
)
# self.metadata_sources.append((source_rdf, 'negotiated'))
self.add_metadata_source(source_rdf)
Expand Down Expand Up @@ -1194,7 +1194,7 @@ def retrieve_metadata_external_oai_ore(self):
source_ore, ore_dict = ore_atom_collector.parse_metadata()
ore_dict = self.exclude_null(ore_dict)
if ore_dict:
self.logger.log(self.LOG_SUCCESS, f"FsF-F2-01M : Found OAI ORE metadata -: {str(ore_dict.keys())}")
self.logger.log(self.LOG_SUCCESS, f"FsF-F2-01M : Found OAI ORE metadata -: {ore_dict.keys()!s}")
self.add_metadata_source(source_ore)
self.merge_metadata(
ore_dict,
Expand Down Expand Up @@ -1224,7 +1224,7 @@ def retrieve_metadata_external_datacite(self):
# self.metadata_sources.append((source_dcitejsn, 'negotiated'))
self.add_metadata_source(source_dcitejsn)
self.logger.log(
self.LOG_SUCCESS, f"FsF-F2-01M : Found Datacite metadata -: {str(dcitejsn_dict.keys())}"
self.LOG_SUCCESS, f"FsF-F2-01M : Found Datacite metadata -: {dcitejsn_dict.keys()!s}"
)

self.namespace_uri.extend(dcite_collector.getNamespaces())
Expand Down Expand Up @@ -1328,7 +1328,7 @@ def retrieve_metadata_external_linked_metadata(self):
if rdf_dict:
self.logger.log(
self.LOG_SUCCESS,
f"FsF-F2-01M : Found Linked Data (RDF) metadata -: {str(rdf_dict.keys())}",
f"FsF-F2-01M : Found Linked Data (RDF) metadata -: {rdf_dict.keys()!s}",
)
# self.metadata_sources.append((source_rdf, metadata_link['source']))
self.add_metadata_source(source_rdf)
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Sources(enum.Enum):
GOOGLE_DATASET = "Google Dataset Search"
MENDELEY_DATA = "Mendeley Data"

def __init__(self, logger: logging.Logger = None):
def __init__(self, logger: logging.Logger | None = None):
"""
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper_datacite.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MetaDataCatalogueDataCite(MetaDataCatalogue):
islisted = False
apiURI = "https://api.datacite.org/dois"

def __init__(self, logger: logging.Logger = None):
def __init__(self, logger: logging.Logger | None = None):
"""
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper_google_datasearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class MetaDataCatalogueGoogleDataSearch(MetaDataCatalogue):
"""

# apiURI = 'https://api.datacite.org/dois'
def __init__(self, logger: logging.Logger = None, object_type=None):
def __init__(self, logger: logging.Logger | None = None, object_type=None):
self.islisted = False

self.logger = logger
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/catalogue_helper_mendeley_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class MetaDataCatalogueMendeleyData(MetaDataCatalogue):
islisted = False
apiURI = "https://api.datasearch.elsevier.com/api/v2/search?query="

def __init__(self, logger: logging.Logger = None):
def __init__(self, logger: logging.Logger | None = None):
self.logger = logger
self.source = self.getEnumSourceNames().MENDELEY_DATA.value

Expand Down
5 changes: 4 additions & 1 deletion fuji_server/helper/metadata_collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,10 @@ class MetaDataCollector:
metadata_mapping: Mapper | None

def __init__(
self, sourcemetadata: dict = None, mapping: metadata_mapper.Mapper = None, logger: logging.Logger = None
self,
sourcemetadata: dict | None = None,
mapping: metadata_mapper.Mapper = None,
logger: logging.Logger | None = None,
):
"""
Parameters
Expand Down
6 changes: 2 additions & 4 deletions fuji_server/helper/metadata_collector_dublincore.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,13 +150,11 @@ def parse_metadata(self):
if k.lower() in dcterms:
# self.logger.info('FsF-F2-01M: DublinCore metadata element, %s = %s , ' % (k, v))
try:
elem = [
elem = next(
key
for (key, value) in Mapper.DC_MAPPING.value.items()
if k.lower() in str(value).lower()
][
0
] # fuji ref fields
) # fuji ref fields
except Exception:
# nothing found so just continue
pass
Expand Down
8 changes: 3 additions & 5 deletions fuji_server/helper/metadata_collector_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,9 +419,7 @@ def parse_metadata(self):
if not RDFparsed:
continue
else:
self.logger.warning(
f"FsF-F2-01M : Failed to parse RDF -: {self.target_url} {str(e)}"
)
self.logger.warning(f"FsF-F2-01M : Failed to parse RDF -: {self.target_url} {e!s}")
else:
self.logger.info(
"FsF-F2-01M : Seems to be HTML not RDF, therefore skipped parsing RDF from -: %s"
Expand Down Expand Up @@ -951,8 +949,8 @@ def get_schemaorg_metadata_from_graph(self, graph):
if "Dataset" in cand_creative_work:
creative_work = cand_creative_work["Dataset"]
else:
creative_work = cand_creative_work[list(cand_creative_work)[0]]
creative_work_type = list(cand_creative_work)[0]
creative_work = cand_creative_work[next(iter(cand_creative_work))]
creative_work_type = next(iter(cand_creative_work))

except Exception as e:
self.logger.info("FsF-F2-01M : Schema.org RDF graph parsing failed -: " + str(e))
Expand Down
2 changes: 1 addition & 1 deletion fuji_server/helper/metadata_provider_rss_atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def getMetadataStandards(self):
except Exception as e:
print("RSS Error ", e)
self.logger.info(
f"{self.metric_id} : Could not parse response retrieved from RSS/Atom Feed endpoint -: {str(e)}"
f"{self.metric_id} : Could not parse response retrieved from RSS/Atom Feed endpoint -: {e!s}"
)

return schemas
Expand Down
5 changes: 2 additions & 3 deletions fuji_server/helper/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import mimetypes
import os
import time
from typing import Any
from urllib.parse import urlparse

import requests
Expand Down Expand Up @@ -61,7 +60,7 @@ class Preprocessor:
long_term_file_formats = {}
open_file_formats = {}
access_rights = {}
re3repositories: dict[Any, Any] = {}
re3repositories: dict[str, str] = {}
linked_vocabs = {}
linked_vocab_index = {}
default_namespaces = []
Expand Down Expand Up @@ -487,7 +486,7 @@ def retrieve_linkedvocabs(cls, lov_api, lodcloud_api, isDebugMode):
broken = []
cls.logger.info(f"{len(raw_lov)} vocabs specified at {lov_api}")
for lov in raw_lov:
title = [i.get("value") for i in lov.get("titles") if i.get("lang") == "en"][0]
title = next(i.get("value") for i in lov.get("titles") if i.get("lang") == "en")
uri = lov.get("uri")
nsp = lov.get("nsp")
if uri and nsp:
Expand Down
6 changes: 3 additions & 3 deletions fuji_server/helper/request_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def content_negotiate(self, metric_id="", ignore_html=True):
self.logger.warning(f"{metric_id} : Content-type is NOT SPECIFIED")
else:
self.logger.warning(
f"{metric_id} : NO successful response received, status code -: {str(status_code)}"
f"{metric_id} : NO successful response received, status code -: {status_code!s}"
)
tp_response.close()
else:
Expand All @@ -512,12 +512,12 @@ def content_negotiate(self, metric_id="", ignore_html=True):
# except requests.exceptions.SSLError as e:
except urllib.error.HTTPError as e:
self.logger.warning(
f"{metric_id} : Content negotiation failed -: accept={self.accept_type}, status={str(e.code)} "
f"{metric_id} : Content negotiation failed -: accept={self.accept_type}, status={e.code!s} "
)
self.response_status = int(e.code)
except urllib.error.URLError as e:
self.logger.warning(f"{metric_id} : RequestException -: {e.reason} : {self.request_url}")
except Exception as e:
print(e, "Request helper")
self.logger.warning(f"{metric_id} : Request Failed -: {str(e)} : {self.request_url}")
self.logger.warning(f"{metric_id} : Request Failed -: {e!s} : {self.request_url}")
return format, self.parse_response
2 changes: 1 addition & 1 deletion fuji_server/models/any_of_fair_results_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class AnyOfFAIRResultsResultsItems(Model):
Do not edit the class manually.
"""

def __init__(self): # noqa: E501
def __init__(self):
"""AnyOfFAIRResultsResultsItems - a model defined in Swagger"""
self.swagger_types = {}

Expand Down
20 changes: 10 additions & 10 deletions fuji_server/models/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ class Body(Model):

def __init__(
self,
object_identifier: str = None,
object_identifier: str | None = None,
test_debug: bool = False,
metadata_service_endpoint: str = None,
metadata_service_type: str = None,
use_datacite: bool = None,
metric_version: str = None,
auth_token: str = None,
auth_token_type: str = None,
oaipmh_endpoint: str = None,
): # noqa: E501
metadata_service_endpoint: str | None = None,
metadata_service_type: str | None = None,
use_datacite: bool | None = None,
metric_version: str | None = None,
auth_token: str | None = None,
auth_token_type: str | None = None,
oaipmh_endpoint: str | None = None,
):
"""Body - a model defined in Swagger
:param object_identifier: The object_identifier of this Body. # noqa: E501
Expand Down Expand Up @@ -106,7 +106,7 @@ def object_identifier(self, object_identifier: str):
:type object_identifier: str
"""
if object_identifier is None:
raise ValueError("Invalid value for `object_identifier`, must not be `None`") # noqa: E501
raise ValueError("Invalid value for `object_identifier`, must not be `None`")

self._object_identifier = object_identifier

Expand Down
Loading

0 comments on commit a91e5cc

Please sign in to comment.