From 12b1d802656947b2e61f1748e02489fbb905f4f4 Mon Sep 17 00:00:00 2001 From: Birger Schacht Date: Mon, 29 Jan 2024 14:38:47 +0100 Subject: [PATCH] refactor(utils): replace URI_replace_settings.yml with clean_uri.toml ... and move the `clean_uri.toml` to the `utils` folder Closes: #572 --- .../default_settings/URI_replace_settings.yml | 7 ------- apis_core/utils/clean_uri.toml | 7 +++++++ apis_core/utils/normalize.py | 19 ++++++++----------- apis_core/utils/settings.py | 3 ++- 4 files changed, 17 insertions(+), 19 deletions(-) delete mode 100644 apis_core/default_settings/URI_replace_settings.yml create mode 100644 apis_core/utils/clean_uri.toml diff --git a/apis_core/default_settings/URI_replace_settings.yml b/apis_core/default_settings/URI_replace_settings.yml deleted file mode 100644 index 127a2a3bf..000000000 --- a/apis_core/default_settings/URI_replace_settings.yml +++ /dev/null @@ -1,7 +0,0 @@ -mappings: - - domain: geonames.org - regex: '^https?://(?:[^.]*[.])?geonames[.]org/([0-9]+)' - replace: 'https://sws.geonames.org/{}/' - - domain: d-nb.info - regex: '^https?://(?:[^.]*[.])?d-nb.info/gnd/([0-9A-Za-u\-]+)' - replace: 'https://d-nb.info/gnd/{}' \ No newline at end of file diff --git a/apis_core/utils/clean_uri.toml b/apis_core/utils/clean_uri.toml new file mode 100644 index 000000000..61e2101bf --- /dev/null +++ b/apis_core/utils/clean_uri.toml @@ -0,0 +1,7 @@ +[geonames_org] +regex = '^https?://(?:[^.]*[.])?geonames[.]org/([0-9]+)' +replace = 'https://sws.geonames.org/{}/' + +[d-nb_info] +regex = '^https?://(?:[^.]*[.])?d-nb.info/gnd/([0-9A-Za-u\-]+)' +replace = 'https://d-nb.info/gnd/{}' diff --git a/apis_core/utils/normalize.py b/apis_core/utils/normalize.py index 8b5012046..6737b70cf 100644 --- a/apis_core/utils/normalize.py +++ b/apis_core/utils/normalize.py @@ -2,19 +2,16 @@ # SPDX-License-Identifier: MIT import re +import tomllib from apis_core.utils.settings import clean_uri_mapping_file -from yaml import safe_load def clean_uri(uri: str) -> str: - if uri: - settings = safe_load(clean_uri_mapping_file().read_text()) - for mapping in settings.get("mappings", []): - domain = mapping["domain"] - regex = mapping["regex"] - replace = mapping["replace"] - if domain in uri: - m = re.match(regex, uri) - if m: - uri = replace.format(m.group(1)) + settings = tomllib.loads(clean_uri_mapping_file().read_text()) + if uri is not None: + for entry in settings.values(): + regex = entry["regex"] + replace = entry["replace"] + if m := re.match(regex, uri): + uri = replace.format(m.group(1)) return uri diff --git a/apis_core/utils/settings.py b/apis_core/utils/settings.py index f5571eaa9..0e6a22917 100644 --- a/apis_core/utils/settings.py +++ b/apis_core/utils/settings.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2023 Birger Schacht # SPDX-License-Identifier: MIT +import os from pathlib import Path from django.conf import settings @@ -11,7 +12,7 @@ def default_settings() -> Path: def clean_uri_mapping_file() -> Path: - default = default_settings() / "URI_replace_settings.yml" + default = Path(__file__).parent / "clean_uri.toml" mapping_file = getattr(settings, "CLEANURI_MAPPINGS", default) return Path(mapping_file)