From 4f7a02ae46c3ceb1e01eb82aa6d2c8a08cc21cf5 Mon Sep 17 00:00:00 2001 From: Heinz-Alexander Fuetterer Date: Thu, 25 Jan 2024 13:44:36 +0100 Subject: [PATCH] feat!: add xsdata models --- .pre-commit-config.yaml | 12 +- README.md | 2 +- pyproject.toml | 10 +- src/oaipmh_scythe/__init__.py | 4 +- src/oaipmh_scythe/client.py | 58 +- src/oaipmh_scythe/iterator.py | 119 +- src/oaipmh_scythe/models.py | 280 --- src/oaipmh_scythe/models/.xsdata.xml | 41 + src/oaipmh_scythe/models/__init__.py | 21 + src/oaipmh_scythe/models/datacite.py | 1675 +++++++++++++++++ src/oaipmh_scythe/models/mixins.py | 59 + src/oaipmh_scythe/models/oai_dc.py | 261 +++ src/oaipmh_scythe/models/oai_pmh.py | 648 +++++++ src/oaipmh_scythe/response.py | 133 +- src/oaipmh_scythe/utils.py | 87 +- tests/conftest.py | 18 + tests/integration/test_get_record.py | 14 +- tests/integration/test_identify.py | 4 +- tests/integration/test_list_identifiers.py | 27 +- .../integration/test_list_metadata_formats.py | 9 +- tests/integration/test_list_records.py | 49 +- tests/integration/test_list_sets.py | 7 +- tests/unit/test_client.py | 129 +- tests/unit/test_iterator.py | 12 +- tests/unit/test_models.py | 321 +--- tests/unit/test_response.py | 56 +- tests/unit/test_utils.py | 58 +- 27 files changed, 3233 insertions(+), 881 deletions(-) delete mode 100644 src/oaipmh_scythe/models.py create mode 100644 src/oaipmh_scythe/models/.xsdata.xml create mode 100644 src/oaipmh_scythe/models/__init__.py create mode 100644 src/oaipmh_scythe/models/datacite.py create mode 100644 src/oaipmh_scythe/models/mixins.py create mode 100644 src/oaipmh_scythe/models/oai_dc.py create mode 100644 src/oaipmh_scythe/models/oai_pmh.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a011828..4f79e5a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -72,9 +72,11 @@ repos: - id: mypy args: [--config-file=pyproject.toml] additional_dependencies: - - httpx==0.26.0 - - lxml-stubs==0.5.1 - exclude: tests + - httpx>=0.27 + - lxml-stubs>=0.5 + - pytest>=8.1 + - xsdata>=24.4 + exclude: tests # TODO: remove this exclusion - repo: https://github.com/scientific-python/cookie rev: d43e497727162f7edcfade1022237565c88d5347 # frozen: 2024.03.10 @@ -86,7 +88,9 @@ repos: hooks: - id: typos args: [--force-exclude] - exclude: CHANGELOG.md # the commit hashes in changelog trigger the spell checker + # CHANGELOG.md: the commit hashes in changelog trigger the spell checker + # src/oaipmh_scythe/models: autogenerated python modules by xsdata + exclude: ^CHANGELOG.md|^src/oaipmh_scythe/models/.* - repo: https://github.com/FHPythonUtils/LicenseCheck/ rev: b2b50f4d40c95b15478279a7a00553a1dc2925ef # frozen: 2024.2 diff --git a/README.md b/README.md index e604565..0815c8d 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ with Scythe("https://zenodo.org/oai2d") as scythe: `oaipmh-scythe` is built with: - [httpx](https://github.com/encode/httpx) for issuing HTTP requests -- [lxml](https://github.com/lxml/lxml) for parsing XML responses +- [xsdata](https://github.com/tefra/xsdata) for parsing XML responses ## Installation diff --git a/pyproject.toml b/pyproject.toml index 0b47ba6..d056933 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dynamic = [ ] dependencies = [ "httpx>=0.25", - "lxml>=5.1", + "xsdata[cli,lxml]", # TODO: remove cli extra ] [project.optional-dependencies] dev = [ @@ -169,6 +169,13 @@ pydocstyle.convention = "google" "src/oaipmh_scythe/client.py" = [ "PLR0913", # too-many-arguments ] +"src/oaipmh_scythe/models/*" = [ + "D101", # undocumented-public-class + "D106", # undocumented-public-nested-class + "D205", + "D415", + "RUF002", +] "tests/*" = [ "D100", # undocumented-public-module "D103", # undocumented-public-function @@ -212,6 +219,7 @@ parallel = true source = ["oaipmh_scythe"] omit = [ "__about__.py", + "src/oaipmh_scythe/models/datacite.py", ] [tool.coverage.report] diff --git a/src/oaipmh_scythe/__init__.py b/src/oaipmh_scythe/__init__.py index c0135b6..d130b31 100644 --- a/src/oaipmh_scythe/__init__.py +++ b/src/oaipmh_scythe/__init__.py @@ -6,9 +6,9 @@ """oaipmh-scythe: A Scythe for harvesting OAI-PMH repositories.""" from oaipmh_scythe.client import Scythe -from oaipmh_scythe.response import OAIResponse +from oaipmh_scythe.response import Response __all__ = [ "Scythe", - "OAIResponse", + "Response", ] diff --git a/src/oaipmh_scythe/client.py b/src/oaipmh_scythe/client.py index b5856c8..2ebe67f 100644 --- a/src/oaipmh_scythe/client.py +++ b/src/oaipmh_scythe/client.py @@ -21,8 +21,8 @@ from oaipmh_scythe.__about__ import __version__ from oaipmh_scythe.iterator import BaseOAIIterator, OAIItemIterator -from oaipmh_scythe.models import Header, Identify, MetadataFormat, OAIItem, Record, Set -from oaipmh_scythe.response import OAIResponse +from oaipmh_scythe.models import Header, Identify, MetadataFormat, Record, Set, Verb +from oaipmh_scythe.response import Response, _build_response from oaipmh_scythe.utils import filter_dict_except_resumption_token, log_response, remove_none_values if TYPE_CHECKING: @@ -34,18 +34,6 @@ logger = logging.getLogger(__name__) USER_AGENT: str = f"oaipmh-scythe/{__version__}" -OAI_NAMESPACE: str = "{http://www.openarchives.org/OAI/2.0/}" - - -# Map OAI verbs to class representations -DEFAULT_CLASS_MAP = { - "GetRecord": Record, - "ListRecords": Record, - "ListIdentifiers": Header, - "ListSets": Set, - "ListMetadataFormats": MetadataFormat, - "Identify": Identify, -} class Scythe: @@ -82,7 +70,6 @@ def __init__( max_retries: int = 0, retry_status_codes: Iterable[int] | None = None, default_retry_after: int = 60, - class_mapping: dict[str, type[OAIItem]] | None = None, encoding: str = "utf-8", auth: AuthTypes | None = None, timeout: int = 60, @@ -98,8 +85,6 @@ def __init__( self.max_retries = max_retries self.retry_status_codes = retry_status_codes or (503,) self.default_retry_after = default_retry_after - self.oai_namespace = OAI_NAMESPACE - self.class_mapping = class_mapping or DEFAULT_CLASS_MAP self.encoding = encoding self.auth = auth self.timeout = timeout @@ -149,7 +134,7 @@ def __exit__( ) -> None: self.close() - def harvest(self, query: dict[str, str]) -> OAIResponse: + def harvest(self, query: dict[str, str]) -> Response: """Perform an HTTP request to the OAI server with the given parameters. Send an OAI-PMH request to the server using the specified parameters. Handle retry logic @@ -171,8 +156,8 @@ def harvest(self, query: dict[str, str]) -> OAIResponse: logger.warning("HTTP %d! Retrying after %d seconds...", http_response.status_code, retry_after) time.sleep(retry_after) http_response = self._request(query) - http_response.raise_for_status() - return OAIResponse(http_response, params=query) + metadata_prefix = query.get("metadataPrefix") + return _build_response(http_response, metadata_prefix) def _request(self, query: dict[str, str]) -> httpx.Response: """Send an HTTP request to the OAI server using the configured HTTP method and given query parameters. @@ -195,7 +180,7 @@ def list_records( set_: str | None = None, resumption_token: str | None = None, ignore_deleted: bool = False, - ) -> Iterator[OAIResponse | Record]: + ) -> Iterator[Response | Record]: """Issue a ListRecords request to the OAI server. Send a request to list records from the OAI server, allowing for selective harvesting based on date range, @@ -224,7 +209,7 @@ def list_records( """ _query = { - "verb": "ListRecords", + "verb": Verb.LIST_RECORDS.value, "from": from_, "until": until, "metadataPrefix": metadata_prefix, @@ -242,7 +227,7 @@ def list_identifiers( set_: str | None = None, resumption_token: str | None = None, ignore_deleted: bool = False, - ) -> Iterator[OAIResponse | Header]: + ) -> Iterator[Response | Header]: """Issue a ListIdentifiers request to the OAI server. Send a request to list record identifiers from the OAI server. This method allows filtering records based on @@ -268,10 +253,9 @@ def list_identifiers( cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server. noRecordsMatch: If no records match the provided criteria. noSetHierarchy: If set-based harvesting is requested but the OAI server does not support sets. - """ _query = { - "verb": "ListIdentifiers", + "verb": Verb.LIST_IDENTIFIERS.value, "from": from_, "until": until, "metadataPrefix": metadata_prefix, @@ -282,7 +266,7 @@ def list_identifiers( query = remove_none_values(filter_dict_except_resumption_token(_query)) yield from self.iterator(self, query, ignore_deleted=ignore_deleted) - def list_sets(self, resumption_token: str | None = None) -> Iterator[OAIResponse | Set]: + def list_sets(self, resumption_token: str | None = None) -> Iterator[Response | Set]: """Issue a ListSets request to the OAI server. Send a request to list all sets defined in the OAI server. Sets are used to categorize records in the OAI @@ -300,10 +284,9 @@ def list_sets(self, resumption_token: str | None = None) -> Iterator[OAIResponse Raises: badResumptionToken: If the provided resumption token is invalid or expired. noSetHierarchy: If the OAI server does not support sets or has no set hierarchy available. - """ _query = { - "verb": "ListSets", + "verb": Verb.LIST_SETS.value, "resumptionToken": resumption_token, } query = remove_none_values(filter_dict_except_resumption_token(_query)) @@ -321,12 +304,12 @@ def identify(self) -> Identify: Returns: Identify: An object encapsulating the server's identify response, which contains various pieces of information about the OAI server. - """ - query = {"verb": "Identify"} - return Identify(self.harvest(query)) + query = {"verb": Verb.IDENTIFY.value} + response = self.harvest(query) + return response.parsed.identify - def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> OAIResponse | Record: + def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> Response | Record: """Issue a GetRecord request to the OAI server. Send a request to the OAI server to retrieve a specific record. The request is constructed with the provided @@ -347,16 +330,16 @@ def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> OAIRes cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server for the requested record. idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server. - """ query = { - "verb": "GetRecord", + "verb": Verb.GET_RECORD.value, "identifier": identifier, "metadataPrefix": metadata_prefix, } - return next(iter(self.iterator(self, query))) + response = self.harvest(query) + return response.parsed.get_record.record - def list_metadata_formats(self, identifier: str | None = None) -> Iterator[OAIResponse | MetadataFormat]: + def list_metadata_formats(self, identifier: str | None = None) -> Iterator[Response | MetadataFormat]: """Issue a ListMetadataFormats request to the OAI server. Send a request to list the metadata formats available from the OAI server. This can be done for the entire @@ -377,10 +360,9 @@ def list_metadata_formats(self, identifier: str | None = None) -> Iterator[OAIRe Raises: idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server. noMetadataFormats: If there are no metadata formats available for the requested record or repository. - """ _query = { - "verb": "ListMetadataFormats", + "verb": Verb.LIST_METADATA_FORMATS.value, "identifier": identifier, } query = remove_none_values(_query) diff --git a/src/oaipmh_scythe/iterator.py b/src/oaipmh_scythe/iterator.py index 18cc6a5..6355c5c 100644 --- a/src/oaipmh_scythe/iterator.py +++ b/src/oaipmh_scythe/iterator.py @@ -19,25 +19,51 @@ from __future__ import annotations from abc import ABC, abstractmethod +from enum import Enum +from operator import attrgetter from typing import TYPE_CHECKING -from oaipmh_scythe import exceptions -from oaipmh_scythe.models import ResumptionToken +from oaipmh_scythe.models import Verb if TYPE_CHECKING: from collections.abc import Iterator from oaipmh_scythe import Scythe - from oaipmh_scythe.models import OAIItem - from oaipmh_scythe.response import OAIResponse - -VERBS_ELEMENTS: dict[str, str] = { - "GetRecord": "record", - "ListRecords": "record", - "ListIdentifiers": "header", - "ListSets": "set", - "ListMetadataFormats": "metadataFormat", - "Identify": "Identify", + from oaipmh_scythe.models import Item, ResumptionToken + from oaipmh_scythe.response import Response + + +class GetRecord(Enum): + attribute = "get_record" + element = "record" + + +class ListIdentifiers(Enum): + attribute = "list_identifiers" + element = "header" + + +class ListRecords(Enum): + attribute = "list_records" + element = "record" + + +class ListSets(Enum): + attribute = "list_sets" + element = "set" + + +class ListMetadataFormats(Enum): + attribute = "list_metadata_formats" + element = "metadata_format" + + +MAPPING = { + Verb.LIST_IDENTIFIERS.value: ListIdentifiers, + Verb.GET_RECORD.value: GetRecord, + Verb.LIST_RECORDS.value: ListRecords, + Verb.LIST_SETS.value: ListSets, + Verb.LIST_METADATA_FORMATS.value: ListMetadataFormats, } @@ -66,8 +92,8 @@ def __init__(self, scythe: Scythe, query: dict[str, str], ignore_deleted: bool = self.scythe = scythe self.query = query self.ignore_deleted = ignore_deleted - self.verb: str = self.query["verb"] - self.oai_response: OAIResponse | None = None + self.verb = self.query["verb"] + self.response: Response | None = None self.resumption_token: ResumptionToken | None = None self._next_response() @@ -87,18 +113,14 @@ def _get_resumption_token(self) -> ResumptionToken | None: Returns: A ResumptionToken instance if a token is found in the response, otherwise None. """ - ns = self.scythe.oai_namespace - if ( - self.oai_response is not None - and (token_element := self.oai_response.xml.find(f".//{ns}resumptionToken")) is not None - ): - return ResumptionToken( - token=token_element.text, - cursor=token_element.attrib.get("cursor"), # type: ignore [arg-type] - complete_list_size=token_element.attrib.get("completeListSize"), # type: ignore [arg-type] - expiration_date=token_element.attrib.get("expirationDate"), # type: ignore [arg-type] - ) - return None + if self.response is None: + return None + try: + attribute = MAPPING[self.verb].attribute.value + parsed_data = getattr(self.response.parsed, attribute) + return parsed_data.resumption_token + except AttributeError: + return None def _next_response(self) -> None: """Request the next batch of data from the OAI server using the current resumption token. @@ -108,22 +130,13 @@ def _next_response(self) -> None: If an error is encountered in the OAI response, an appropriate exception is raised. """ - if self.resumption_token and self.resumption_token.token: - self.query = {"verb": self.verb, "resumptionToken": self.resumption_token.token} - self.oai_response = self.scythe.harvest(self.query) - - if (error := self.oai_response.xml.find(f".//{self.scythe.oai_namespace}error")) is not None: - code = str(error.attrib.get("code", "UNKNOWN")) - description = error.text or "" - try: - exception_name = code[0].upper() + code[1:] - raise getattr(exceptions, exception_name)(description) - except AttributeError as exc: - raise exceptions.GeneralOAIPMHError(description) from exc + if self.resumption_token is not None: + self.query = {"verb": self.verb, "resumptionToken": self.resumption_token.value} + self.response = self.scythe.harvest(self.query) self.resumption_token = self._get_resumption_token() -class OAIResponseIterator(BaseOAIIterator): +class ResponseIterator(BaseOAIIterator): """An iterator class for iterating over OAI responses obtained via the OAI-PMH protocol. This iterator specifically handles the iteration of OAIResponse objects, allowing for seamless @@ -131,7 +144,7 @@ class OAIResponseIterator(BaseOAIIterator): underlying mechanisms of the BaseOAIIterator, including handling of resumption tokens for paginated data. """ - def __iter__(self) -> Iterator[OAIResponse]: + def __iter__(self) -> Iterator[Response]: """Yield the next OAIResponse object from the server response sequence. Enable the OAIResponseIterator to iterate over a series of OAIResponse objects, managing pagination @@ -141,10 +154,10 @@ def __iter__(self) -> Iterator[OAIResponse]: OAIResponse: The next available OAIResponse object in the sequence. """ while True: - if self.oai_response: - yield self.oai_response - self.oai_response = None - elif self.resumption_token and self.resumption_token.token: + if self.response: + yield self.response + self.response = None + elif self.resumption_token: self._next_response() else: return @@ -164,8 +177,9 @@ class OAIItemIterator(BaseOAIIterator): def __init__(self, scythe: Scythe, query: dict[str, str], ignore_deleted: bool = False) -> None: self.verb = query["verb"] - self.mapper = scythe.class_mapping[self.verb] - self.element = VERBS_ELEMENTS[self.verb] + attribute = MAPPING[query["verb"]].attribute.value + element = MAPPING[query["verb"]].element.value + self.items_getter = attrgetter(f"{attribute}.{element}") super().__init__(scythe, query, ignore_deleted) def _next_response(self) -> None: @@ -175,12 +189,12 @@ def _next_response(self) -> None: for the specific elements (e.g. records, headers) based on the current resumption token. """ super()._next_response() - if self.oai_response is not None: - self._items = self.oai_response.xml.iterfind(f".//{self.scythe.oai_namespace}{self.element}") + if self.response is not None: + self._items = self.items_getter(self.response.parsed) else: self._items = iter(()) - def __iter__(self) -> Iterator[OAIItem]: + def __iter__(self) -> Iterator[Item]: """Iterate over individual OAI items from the response. Go through the items in the OAI-PMH response, applying any necessary mapping and handling @@ -191,11 +205,10 @@ def __iter__(self) -> Iterator[OAIItem]: """ while True: for item in self._items: - mapped = self.mapper(item) - if self.ignore_deleted and mapped.deleted: + if self.ignore_deleted and item.deleted: continue - yield mapped - if self.resumption_token and self.resumption_token.token: + yield item + if self.resumption_token: self._next_response() else: return diff --git a/src/oaipmh_scythe/models.py b/src/oaipmh_scythe/models.py deleted file mode 100644 index f2464d9..0000000 --- a/src/oaipmh_scythe/models.py +++ /dev/null @@ -1,280 +0,0 @@ -# SPDX-FileCopyrightText: 2015 Mathias Loesch -# SPDX-FileCopyrightText: 2023 Heinz-Alexander Fütterer -# -# SPDX-License-Identifier: BSD-3-Clause - -"""The models module defines data structures for representing various components of the OAI-PMH protocol. - -This module includes classes that encapsulate different entities in OAI-PMH, such as resumption tokens and -various types of OAI items. These classes provide structured representations of OAI-PMH elements, -facilitating their manipulation and processing in client applications. - -Classes: - ResumptionToken: Represents a resumption token used in OAI-PMH for paginated data retrieval. - OAIItem: A base class for generic OAI items. - Identify: Represents an Identify response in OAI-PMH. - Header: Represents an OAI Header element. - Record: Represents an OAI Record element. - Set: Represents an OAI Set element. - MetadataFormat: Represents an OAI MetadataFormat element. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import TYPE_CHECKING - -from lxml import etree - -from oaipmh_scythe.utils import get_namespace, xml_to_dict - -if TYPE_CHECKING: - from collections.abc import Iterator - - from oaipmh_scythe.response import OAIResponse - - -@dataclass -class ResumptionToken: - """A data class representing a resumption token in the OAI-PMH protocol. - - Resumption tokens are used for iterating over multiple sets of results in OAI-PMH - harvest requests. This class encapsulates the typical components of a resumption token, - including the token itself, cursor, complete list size, and an expiration date. - - Attributes: - token: The actual resumption token used for continuing the iteration in subsequent OAI-PMH requests. - Default is None. - cursor: A marker indicating the current position in the list of results. Default is None. - complete_list_size: The total number of records in the complete list of results. Default is None. - expiration_date: The date and time when the resumption token expires. Default is None. - """ - - token: str | None = None - cursor: str | None = None - complete_list_size: str | None = None - expiration_date: str | None = None - - def __repr__(self) -> str: - return f"" - - -class OAIItem: - """A base class representing a generic item in the OAI-PMH protocol. - - This class provides a common structure for handling and manipulating XML data - associated with different types of OAI-PMH items, such as records, headers, or sets. - - Attributes: - xml: The parsed XML element representing the OAI item. - _strip_ns: A flag indicating whether to remove the namespaces from the element names - in the dictionary representation. - _oai_namespace: The namespace URI extracted from the XML element. - """ - - def __init__(self, xml: etree._Element, strip_ns: bool = True) -> None: - super().__init__() - self.xml = xml - self._strip_ns = strip_ns - self._oai_namespace = get_namespace(self.xml) - - def __bytes__(self) -> bytes: - return etree.tostring(self.xml, encoding="utf-8") - - def __str__(self) -> str: - return etree.tostring(self.xml, encoding="unicode") - - @property - def raw(self) -> str: - """Return the original XML as a unicode string.""" - return etree.tostring(self.xml, encoding="unicode") - - -class Identify(OAIItem): - """A class representing an Identify container in the OAI-PMH protocol. - - This class is specifically used for handling the response of an Identify request in OAI-PMH. - It differs from other OAI entities in that it is initialized with an OAIResponse object - rather than a direct XML element. The class parses the Identify information from the - response and provides access to its individual components. - - Args: - identify_response: The response object from an Identify request. - It should contain the XML representation of the Identify response. - - Attributes: - xml: The XML element representing the Identify response. - _identify_dict: A dictionary containing the parsed Identify information. - Dynamic Attributes: Based on the content of the Identify response, additional attributes - are dynamically set on this object. These can include attributes like - repository name, base URL, protocol version, etc. - - Raises: - ValueError: If the Identify element is not found in the provided XML. - """ - - def __init__(self, identify_response: OAIResponse) -> None: - super().__init__(identify_response.xml, strip_ns=True) - identify_element = self.xml.find(f".//{self._oai_namespace}Identify") - if identify_element is None: - raise ValueError("Identify element not found in the XML.") - self.xml = identify_element - self._identify_dict = xml_to_dict(self.xml, strip_ns=True) - for k, v in self._identify_dict.items(): - setattr(self, k.replace("-", "_"), v[0]) - - def __repr__(self) -> str: - return "" - - def __iter__(self) -> Iterator: - """Iterate over the Identify information, yielding key-value pairs.""" - return iter(self._identify_dict.items()) - - -class Header(OAIItem): - """A class representing an OAI Header in the OAI-PMH protocol. - - The header contains essential information about a record, such as its identifier, datestamp, - and set specifications. This class parses these details from the provided XML header element - and makes them easily accessible as attributes. - - Args: - header_element: The XML element representing the OAI header. - - Attributes: - deleted: Indicates whether the record is marked as deleted in the OAI-PMH repository. - identifier: The unique identifier of the record in the OAI-PMH repository. - datestamp: The datestamp of the record, indicating when it was last updated. - setSpecs: A list of set specifications that the record belongs to. - """ - - def __init__(self, header_element: etree._Element) -> None: - super().__init__(header_element, strip_ns=True) - self.deleted = self.xml.attrib.get("status") == "deleted" - _identifier_element = self.xml.find(f"{self._oai_namespace}identifier") - _datestamp_element = self.xml.find(f"{self._oai_namespace}datestamp") - - self.identifier = getattr(_identifier_element, "text", None) - self.datestamp = getattr(_datestamp_element, "text", None) - self.setSpecs = [setSpec.text for setSpec in self.xml.findall(f"{self._oai_namespace}setSpec")] - - def __repr__(self) -> str: - return f"
" - - def __iter__(self) -> Iterator: - """Iterate over the header information, yielding key-value pairs.""" - return iter( - [ - ("identifier", self.identifier), - ("datestamp", self.datestamp), - ("setSpecs", self.setSpecs), - ] - ) - - -class Record(OAIItem): - """A class representing an OAI record in the OAI-PMH protocol. - - This class encapsulates a record element from an OAI-PMH response, handling its parsing, and providing - structured access to its details, such as header information and metadata. It checks for the presence of - the header and metadata elements and raises an error if the header is not found. - - Args: - record_element: The XML element representing the OAI record. - strip_ns: If True, namespaces are removed from the element names in the parsed metadata. Defaults to True. - - Attributes: - header: An instance of the Header class representing the header information of the record. - deleted: Indicates whether the record is marked as deleted. - metadata: A dictionary representation of the record's metadata, if available and not deleted. - - Raises: - ValueError: If the header element is not found in the provided XML. - """ - - def __init__(self, record_element: etree._Element, strip_ns: bool = True) -> None: - super().__init__(record_element, strip_ns=strip_ns) - header_element = self.xml.find(f".//{self._oai_namespace}header") - if header_element is None: - raise ValueError("Header element not found in the XML.") - self.header = Header(header_element) - self.deleted = self.header.deleted - if not self.deleted: - self.metadata = self.get_metadata() - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> Iterator: - """Iterate over the record's metadata, yielding key-value pairs.""" - return iter(self.metadata.items()) - - def get_metadata(self): - """Extract and return the record's metadata as a dictionary.""" - # We want to get record/metadata//* - # would be the element ``dc`` - # in the ``oai_dc`` case. - return xml_to_dict( - self.xml.find(".//" + self._oai_namespace + "metadata").getchildren()[0], - strip_ns=self._strip_ns, - ) - - -class Set(OAIItem): - """A class representing a set in the OAI-PMH protocol. - - This class encapsulates a set element from an OAI-PMH response and provides structured access to its details. - It parses the set information from the provided XML element and dynamically sets attributes - based on the parsed content. - - Args: - set_element: The XML element representing the OAI set. The element is parsed to extract set details. - - Attributes: - setName: The name of the set, extracted from the set's XML element. - _set_dict: A dictionary containing the parsed set information. - """ - - def __init__(self, set_element: etree._Element) -> None: - super().__init__(set_element, strip_ns=True) - self._set_dict = xml_to_dict(self.xml, strip_ns=True) - self.setName: str | None = None - for k, v in self._set_dict.items(): - setattr(self, k.replace("-", "_"), v[0]) - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> Iterator: - """Iterate over the set information, yielding key-value pairs.""" - return iter(self._set_dict.items()) - - -class MetadataFormat(OAIItem): - """A class representing a metadata format in the OAI-PMH protocol. - - This class handles the representation of a metadata format, which is an essential part of the OAI-PMH protocol. - It parses the provided XML element to extract and store metadata format details such as the metadata prefix. - - Args: - mdf_element: The XML element representing the metadata format. This element is parsed - to extract metadata format details. - - Attributes: - metadataPrefix: The prefix of the metadata format, extracted from the XML element. - _mdf_dict: A dictionary containing the parsed metadata format details. - """ - - def __init__(self, mdf_element: etree._Element) -> None: - super().__init__(mdf_element, strip_ns=True) - self._mdf_dict = xml_to_dict(self.xml, strip_ns=True) - self.metadataPrefix: str | None = None - for k, v in self._mdf_dict.items(): - setattr(self, k.replace("-", "_"), v[0]) - - def __repr__(self) -> str: - return f"" - - def __iter__(self) -> Iterator: - """Iterate over the metadata format information, yielding key-value pairs.""" - return iter(self._mdf_dict.items()) diff --git a/src/oaipmh_scythe/models/.xsdata.xml b/src/oaipmh_scythe/models/.xsdata.xml new file mode 100644 index 0000000..136575c --- /dev/null +++ b/src/oaipmh_scythe/models/.xsdata.xml @@ -0,0 +1,41 @@ + + + + generated + dataclasses + single-package + Google + false + false + false + true + false + false + true + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/oaipmh_scythe/models/__init__.py b/src/oaipmh_scythe/models/__init__.py new file mode 100644 index 0000000..93539f5 --- /dev/null +++ b/src/oaipmh_scythe/models/__init__.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: 2024 Heinz-Alexander Fütterer +# +# SPDX-License-Identifier: BSD-3-Clause + +"""TODO.""" + +from oaipmh_scythe.models.oai_pmh import Header, Identify, MetadataFormat, Record, ResumptionToken, Set, Verb + +# `Item` can be used for type annotations +Item = Header | Record | Set | MetadataFormat + +__all__ = [ + "Header", + "Identify", + "MetadataFormat", + "Record", + "ResumptionToken", + "Set", + "Verb", + "Item", +] diff --git a/src/oaipmh_scythe/models/datacite.py b/src/oaipmh_scythe/models/datacite.py new file mode 100644 index 0000000..5dc27b2 --- /dev/null +++ b/src/oaipmh_scythe/models/datacite.py @@ -0,0 +1,1675 @@ +"""This file was generated by xsdata, v24.4, on 2024-04-18 18:56:33 + +Generator: DataclassGenerator +See: https://xsdata.readthedocs.io/ +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum + + +@dataclass(slots=True) +class Affiliation: + """Uniquely identifies an affiliation, according to various identifier schemes.""" + + class Meta: + name = "affiliation" + target_namespace = "http://datacite.org/schema/kernel-4" + + value: str = field( + default="", + metadata={ + "required": True, + "min_length": 1, + }, + ) + affiliation_identifier: None | str = field( + default=None, + metadata={ + "name": "affiliationIdentifier", + "type": "Attribute", + }, + ) + affiliation_identifier_scheme: None | str = field( + default=None, + metadata={ + "name": "affiliationIdentifierScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + + +@dataclass(slots=True) +class Box: + class Meta: + name = "box" + target_namespace = "http://datacite.org/schema/kernel-4" + + west_bound_longitude: None | float = field( + default=None, + metadata={ + "name": "westBoundLongitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -180.0, + "max_inclusive": 180.0, + }, + ) + east_bound_longitude: None | float = field( + default=None, + metadata={ + "name": "eastBoundLongitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -180.0, + "max_inclusive": 180.0, + }, + ) + south_bound_latitude: None | float = field( + default=None, + metadata={ + "name": "southBoundLatitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -90.0, + "max_inclusive": 90.0, + }, + ) + north_bound_latitude: None | float = field( + default=None, + metadata={ + "name": "northBoundLatitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -90.0, + "max_inclusive": 90.0, + }, + ) + + +class ContributorType(Enum): + """The type of contributor of the resource.""" + + CONTACT_PERSON = "ContactPerson" + DATA_COLLECTOR = "DataCollector" + DATA_CURATOR = "DataCurator" + DATA_MANAGER = "DataManager" + DISTRIBUTOR = "Distributor" + EDITOR = "Editor" + HOSTING_INSTITUTION = "HostingInstitution" + OTHER = "Other" + PRODUCER = "Producer" + PROJECT_LEADER = "ProjectLeader" + PROJECT_MANAGER = "ProjectManager" + PROJECT_MEMBER = "ProjectMember" + REGISTRATION_AGENCY = "RegistrationAgency" + REGISTRATION_AUTHORITY = "RegistrationAuthority" + RELATED_PERSON = "RelatedPerson" + RESEARCH_GROUP = "ResearchGroup" + RIGHTS_HOLDER = "RightsHolder" + RESEARCHER = "Researcher" + SPONSOR = "Sponsor" + SUPERVISOR = "Supervisor" + WORK_PACKAGE_LEADER = "WorkPackageLeader" + + +class DateType(Enum): + """The type of date. + + Use RKMS‐ISO8601 standard for depicting date ranges.To indicate the end of an embargo period, use Available. + To indicate the start of an embargo period, use Submitted or Accepted, as appropriate. + """ + + ACCEPTED = "Accepted" + AVAILABLE = "Available" + COLLECTED = "Collected" + COPYRIGHTED = "Copyrighted" + CREATED = "Created" + ISSUED = "Issued" + OTHER = "Other" + SUBMITTED = "Submitted" + UPDATED = "Updated" + VALID = "Valid" + WITHDRAWN = "Withdrawn" + + +class DescriptionType(Enum): + """The type of the description.""" + + ABSTRACT = "Abstract" + METHODS = "Methods" + SERIES_INFORMATION = "SeriesInformation" + TABLE_OF_CONTENTS = "TableOfContents" + TECHNICAL_INFO = "TechnicalInfo" + OTHER = "Other" + + +class FunderIdentifierType(Enum): + """The type of the funderIdentifier.""" + + ISNI = "ISNI" + GRID = "GRID" + ROR = "ROR" + CROSSREF_FUNDER_ID = "Crossref Funder ID" + OTHER = "Other" + + +@dataclass(slots=True) +class NameIdentifier: + """Uniquely identifies a creator or contributor, according to various identifier + schemes. + """ + + class Meta: + name = "nameIdentifier" + target_namespace = "http://datacite.org/schema/kernel-4" + + value: str = field( + default="", + metadata={ + "required": True, + "min_length": 1, + }, + ) + name_identifier_scheme: None | str = field( + default=None, + metadata={ + "name": "nameIdentifierScheme", + "type": "Attribute", + "required": True, + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + + +class NameType(Enum): + ORGANIZATIONAL = "Organizational" + PERSONAL = "Personal" + + +class NumberType(Enum): + ARTICLE = "Article" + CHAPTER = "Chapter" + REPORT = "Report" + OTHER = "Other" + + +@dataclass(slots=True) +class Point: + class Meta: + name = "point" + target_namespace = "http://datacite.org/schema/kernel-4" + + point_longitude: None | float = field( + default=None, + metadata={ + "name": "pointLongitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -180.0, + "max_inclusive": 180.0, + }, + ) + point_latitude: None | float = field( + default=None, + metadata={ + "name": "pointLatitude", + "type": "Element", + "namespace": "http://datacite.org/schema/kernel-4", + "required": True, + "min_inclusive": -90.0, + "max_inclusive": 90.0, + }, + ) + + +class RelatedIdentifierType(Enum): + """The type of the RelatedIdentifier.""" + + ARK = "ARK" + AR_XIV = "arXiv" + BIBCODE = "bibcode" + DOI = "DOI" + EAN13 = "EAN13" + EISSN = "EISSN" + HANDLE = "Handle" + IGSN = "IGSN" + ISBN = "ISBN" + ISSN = "ISSN" + ISTC = "ISTC" + LISSN = "LISSN" + LSID = "LSID" + PMID = "PMID" + PURL = "PURL" + UPC = "UPC" + URL = "URL" + URN = "URN" + W3ID = "w3id" + + +class RelationType(Enum): + """Description of the relationship of the resource being registered (A) and the + related resource (B). + """ + + IS_CITED_BY = "IsCitedBy" + CITES = "Cites" + IS_SUPPLEMENT_TO = "IsSupplementTo" + IS_SUPPLEMENTED_BY = "IsSupplementedBy" + IS_CONTINUED_BY = "IsContinuedBy" + CONTINUES = "Continues" + IS_NEW_VERSION_OF = "IsNewVersionOf" + IS_PREVIOUS_VERSION_OF = "IsPreviousVersionOf" + IS_PART_OF = "IsPartOf" + HAS_PART = "HasPart" + IS_PUBLISHED_IN = "IsPublishedIn" + IS_REFERENCED_BY = "IsReferencedBy" + REFERENCES = "References" + IS_DOCUMENTED_BY = "IsDocumentedBy" + DOCUMENTS = "Documents" + IS_COMPILED_BY = "IsCompiledBy" + COMPILES = "Compiles" + IS_VARIANT_FORM_OF = "IsVariantFormOf" + IS_ORIGINAL_FORM_OF = "IsOriginalFormOf" + IS_IDENTICAL_TO = "IsIdenticalTo" + HAS_METADATA = "HasMetadata" + IS_METADATA_FOR = "IsMetadataFor" + REVIEWS = "Reviews" + IS_REVIEWED_BY = "IsReviewedBy" + IS_DERIVED_FROM = "IsDerivedFrom" + IS_SOURCE_OF = "IsSourceOf" + DESCRIBES = "Describes" + IS_DESCRIBED_BY = "IsDescribedBy" + HAS_VERSION = "HasVersion" + IS_VERSION_OF = "IsVersionOf" + REQUIRES = "Requires" + IS_REQUIRED_BY = "IsRequiredBy" + OBSOLETES = "Obsoletes" + IS_OBSOLETED_BY = "IsObsoletedBy" + COLLECTS = "Collects" + IS_COLLECTED_BY = "IsCollectedBy" + + +class ResourceType(Enum): + """The general type of a resource.""" + + AUDIOVISUAL = "Audiovisual" + BOOK = "Book" + BOOK_CHAPTER = "BookChapter" + COLLECTION = "Collection" + COMPUTATIONAL_NOTEBOOK = "ComputationalNotebook" + CONFERENCE_PAPER = "ConferencePaper" + CONFERENCE_PROCEEDING = "ConferenceProceeding" + DATA_PAPER = "DataPaper" + DATASET = "Dataset" + DISSERTATION = "Dissertation" + EVENT = "Event" + IMAGE = "Image" + INSTRUMENT = "Instrument" + INTERACTIVE_RESOURCE = "InteractiveResource" + JOURNAL = "Journal" + JOURNAL_ARTICLE = "JournalArticle" + MODEL = "Model" + OUTPUT_MANAGEMENT_PLAN = "OutputManagementPlan" + PEER_REVIEW = "PeerReview" + PHYSICAL_OBJECT = "PhysicalObject" + PREPRINT = "Preprint" + REPORT = "Report" + SERVICE = "Service" + SOFTWARE = "Software" + SOUND = "Sound" + STANDARD = "Standard" + STUDY_REGISTRATION = "StudyRegistration" + TEXT = "Text" + WORKFLOW = "Workflow" + OTHER = "Other" + + +class TitleType(Enum): + ALTERNATIVE_TITLE = "AlternativeTitle" + SUBTITLE = "Subtitle" + TRANSLATED_TITLE = "TranslatedTitle" + OTHER = "Other" + + +class LangValue(Enum): + VALUE = "" + + +@dataclass(slots=True) +class Resource: + """Root element of a single record. + + This wrapper element is for XML implementation only and is not defined in the DataCite DOI standard. + Note: This is the case for all wrapper elements within this schema. + No content in this wrapper element. + + Attributes: + identifier: A persistent identifier that identifies a resource. + creators: + titles: + publisher: The name of the entity that holds, archives, publishes prints, distributes, releases, issues, + or produces the resource. This property will be used to formulate the citation, so consider the + prominence of the role. In the case of datasets, "publish" is understood to mean making the data + available to the community of researchers. + publication_year: Year when the data is made publicly available. If an embargo period has been in effect, + use the date when the embargo period ends. In the case of datasets, "publish" is understood to mean + making the data available on a specific date to the community of researchers. If there is no standard + publication year value, use the date that would be preferred from a citation perspective. YYYY + resource_type: The type of a resource. You may enter an additional free text description. The format is + open, but the preferred format is a single term of some detail so that a pair can be formed with the + sub-property. + subjects: + contributors: + dates: + language: Primary language of the resource. Allowed values are taken from IETF BCP 47, ISO 639-1 + language codes. + alternate_identifiers: + related_identifiers: + sizes: + formats: + version: Version number of the resource. If the primary resource has changed the version number + increases. Register a new identifier for a major version change. Individual stewards need to + determine which are major vs. minor versions. May be used in conjunction with properties 11 and 12 + (AlternateIdentifier and RelatedIdentifier) to indicate various information updates. May be used in + conjunction with property 17 (Description) to indicate the nature and file/record range of version. + rights_list: + descriptions: + geo_locations: + funding_references: + related_items: + """ + + class Meta: + name = "resource" + namespace = "http://datacite.org/schema/kernel-4" + + identifier: None | Resource.Identifier = field( + default=None, + metadata={ + "type": "Element", + "required": True, + }, + ) + creators: None | Resource.Creators = field( + default=None, + metadata={ + "type": "Element", + "required": True, + }, + ) + titles: None | Resource.Titles = field( + default=None, + metadata={ + "type": "Element", + "required": True, + }, + ) + publisher: None | Resource.Publisher = field( + default=None, + metadata={ + "type": "Element", + "required": True, + }, + ) + publication_year: None | str = field( + default=None, + metadata={ + "name": "publicationYear", + "type": "Element", + "required": True, + "pattern": r"[\d]{4}", + }, + ) + resource_type: None | Resource.ResourceType = field( + default=None, + metadata={ + "name": "resourceType", + "type": "Element", + "required": True, + }, + ) + subjects: None | Resource.Subjects = field( + default=None, + metadata={ + "type": "Element", + }, + ) + contributors: None | Resource.Contributors = field( + default=None, + metadata={ + "type": "Element", + }, + ) + dates: None | Resource.Dates = field( + default=None, + metadata={ + "type": "Element", + }, + ) + language: None | str = field( + default=None, + metadata={ + "type": "Element", + }, + ) + alternate_identifiers: None | Resource.AlternateIdentifiers = field( + default=None, + metadata={ + "name": "alternateIdentifiers", + "type": "Element", + }, + ) + related_identifiers: None | Resource.RelatedIdentifiers = field( + default=None, + metadata={ + "name": "relatedIdentifiers", + "type": "Element", + }, + ) + sizes: None | Resource.Sizes = field( + default=None, + metadata={ + "type": "Element", + }, + ) + formats: None | Resource.Formats = field( + default=None, + metadata={ + "type": "Element", + }, + ) + version: None | str = field( + default=None, + metadata={ + "type": "Element", + }, + ) + rights_list: None | Resource.RightsList = field( + default=None, + metadata={ + "name": "rightsList", + "type": "Element", + }, + ) + descriptions: None | Resource.Descriptions = field( + default=None, + metadata={ + "type": "Element", + }, + ) + geo_locations: None | Resource.GeoLocations = field( + default=None, + metadata={ + "name": "geoLocations", + "type": "Element", + }, + ) + funding_references: None | Resource.FundingReferences = field( + default=None, + metadata={ + "name": "fundingReferences", + "type": "Element", + }, + ) + related_items: None | Resource.RelatedItems = field( + default=None, + metadata={ + "name": "relatedItems", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Identifier: + value: str = field( + default="", + metadata={ + "required": True, + "min_length": 1, + }, + ) + identifier_type: None | object = field( + default=None, + metadata={ + "name": "identifierType", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class Creators: + """Attributes: + creator: The main researchers involved working on the data, or the authors of the publication in + priority order. May be a corporate/institutional or personal name. Format: Family, Given. + Personal names can be further specified using givenName and familyName. + """ + + creator: list[Resource.Creators.Creator] = field( + default_factory=list, + metadata={ + "type": "Element", + "min_occurs": 1, + }, + ) + + @dataclass(slots=True) + class Creator: + creator_name: None | Resource.Creators.Creator.CreatorName = field( + default=None, + metadata={ + "name": "creatorName", + "type": "Element", + "required": True, + }, + ) + given_name: None | object = field( + default=None, + metadata={ + "name": "givenName", + "type": "Element", + }, + ) + family_name: None | object = field( + default=None, + metadata={ + "name": "familyName", + "type": "Element", + }, + ) + name_identifier: list[object] = field( + default_factory=list, + metadata={ + "name": "nameIdentifier", + "type": "Element", + }, + ) + affiliation: list[object] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class CreatorName: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + name_type: None | NameType = field( + default=None, + metadata={ + "name": "nameType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Titles: + """Attributes: + title: A name or title by which a resource is known. + """ + + title: list[Resource.Titles.Title] = field( + default_factory=list, + metadata={ + "type": "Element", + "min_occurs": 1, + }, + ) + + @dataclass(slots=True) + class Title: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + title_type: None | TitleType = field( + default=None, + metadata={ + "name": "titleType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Publisher: + value: str = field( + default="", + metadata={ + "required": True, + "min_length": 1, + }, + ) + publisher_identifier: None | str = field( + default=None, + metadata={ + "name": "publisherIdentifier", + "type": "Attribute", + }, + ) + publisher_identifier_scheme: None | str = field( + default=None, + metadata={ + "name": "publisherIdentifierScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class ResourceType: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + resource_type_general: None | ResourceType = field( + default=None, + metadata={ + "name": "resourceTypeGeneral", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class Subjects: + """Attributes: + subject: Subject, keywords, classification codes, or key phrases describing the resource. + """ + + subject: list[Resource.Subjects.Subject] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Subject: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + subject_scheme: None | object = field( + default=None, + metadata={ + "name": "subjectScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + value_uri: None | str = field( + default=None, + metadata={ + "name": "valueURI", + "type": "Attribute", + }, + ) + classification_code: None | str = field( + default=None, + metadata={ + "name": "classificationCode", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Contributors: + """Attributes: + contributor: The institution or person responsible for collecting, creating, or otherwise + contributing to the development of the dataset. The personal name format should be: Family, + Given. + """ + + contributor: list[Resource.Contributors.Contributor] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Contributor: + contributor_name: None | Resource.Contributors.Contributor.ContributorName = field( + default=None, + metadata={ + "name": "contributorName", + "type": "Element", + "required": True, + }, + ) + given_name: None | object = field( + default=None, + metadata={ + "name": "givenName", + "type": "Element", + }, + ) + family_name: None | object = field( + default=None, + metadata={ + "name": "familyName", + "type": "Element", + }, + ) + name_identifier: list[object] = field( + default_factory=list, + metadata={ + "name": "nameIdentifier", + "type": "Element", + }, + ) + affiliation: list[object] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + contributor_type: None | ContributorType = field( + default=None, + metadata={ + "name": "contributorType", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class ContributorName: + value: str = field( + default="", + metadata={ + "required": True, + "min_length": 1, + }, + ) + name_type: None | NameType = field( + default=None, + metadata={ + "name": "nameType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Dates: + """Attributes: + date: Different dates relevant to the work. YYYY,YYYY-MM-DD, YYYY-MM-DDThh:mm:ssTZD or any other + format or level of granularity described in W3CDTF. Use RKMS-ISO8601 standard for depicting date + ranges. + """ + + date: list[Resource.Dates.Date] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Date: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + date_type: None | DateType = field( + default=None, + metadata={ + "name": "dateType", + "type": "Attribute", + "required": True, + }, + ) + date_information: None | object = field( + default=None, + metadata={ + "name": "dateInformation", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class AlternateIdentifiers: + """Attributes: + alternate_identifier: An identifier or identifiers other than the primary Identifier applied to the + resource being registered. This may be any alphanumeric string which is unique within its domain + of issue. May be used for local identifiers. AlternateIdentifier should be used for another + identifier of the same instance (same location, same file). + """ + + alternate_identifier: list[Resource.AlternateIdentifiers.AlternateIdentifier] = field( + default_factory=list, + metadata={ + "name": "alternateIdentifier", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class AlternateIdentifier: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + alternate_identifier_type: None | object = field( + default=None, + metadata={ + "name": "alternateIdentifierType", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class RelatedIdentifiers: + """Attributes: + related_identifier: Identifiers of related resources. Use this property to indicate subsets of + properties, as appropriate. + """ + + related_identifier: list[Resource.RelatedIdentifiers.RelatedIdentifier] = field( + default_factory=list, + metadata={ + "name": "relatedIdentifier", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class RelatedIdentifier: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + resource_type_general: None | ResourceType = field( + default=None, + metadata={ + "name": "resourceTypeGeneral", + "type": "Attribute", + }, + ) + related_identifier_type: None | RelatedIdentifierType = field( + default=None, + metadata={ + "name": "relatedIdentifierType", + "type": "Attribute", + "required": True, + }, + ) + relation_type: None | RelationType = field( + default=None, + metadata={ + "name": "relationType", + "type": "Attribute", + "required": True, + }, + ) + related_metadata_scheme: None | object = field( + default=None, + metadata={ + "name": "relatedMetadataScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + scheme_type: None | object = field( + default=None, + metadata={ + "name": "schemeType", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class Sizes: + """Attributes: + size: Unstructures size information about the resource. + """ + + size: list[str] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Formats: + """Attributes: + format: Technical format of the resource. Use file extension or MIME type where possible. + """ + + format: list[str] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class RightsList: + """Attributes: + rights: Any rights information for this resource. Provide a rights management statement for the + resource or reference a service providing such information. Include embargo information if + applicable. Use the complete title of a license and include version information if applicable. + """ + + rights: list[Resource.RightsList.Rights] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Rights: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + rights_uri: None | str = field( + default=None, + metadata={ + "name": "rightsURI", + "type": "Attribute", + }, + ) + rights_identifier: None | object = field( + default=None, + metadata={ + "name": "rightsIdentifier", + "type": "Attribute", + }, + ) + rights_identifier_scheme: None | object = field( + default=None, + metadata={ + "name": "rightsIdentifierScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Descriptions: + """Attributes: + description: All additional information that does not fit in any of the other categories. May be used + for technical information. It is a best practice to supply a description. + """ + + description: list[Resource.Descriptions.Description] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Description: + description_type: None | DescriptionType = field( + default=None, + metadata={ + "name": "descriptionType", + "type": "Attribute", + "required": True, + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + content: list[object] = field( + default_factory=list, + metadata={ + "type": "Wildcard", + "namespace": "##any", + "mixed": True, + "choices": ( + { + "name": "br", + "type": object, + }, + ), + }, + ) + + @dataclass(slots=True) + class GeoLocations: + geo_location: list[Resource.GeoLocations.GeoLocation] = field( + default_factory=list, + metadata={ + "name": "geoLocation", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class GeoLocation: + """Attributes: + geo_location_place: Spatial region or named place where the data was gathered or about which the + data is focused. + geo_location_point: A point contains a single latitude-longitude pair. + geo_location_box: A box contains two white space separated latitude-longitude pairs, with each + pair separated by whitespace. The first pair is the lower corner, the second is the upper + corner. + geo_location_polygon: A drawn polygon area, defined by a set of points and lines connecting the + points in a closed chain. + """ + + geo_location_place: list[object] = field( + default_factory=list, + metadata={ + "name": "geoLocationPlace", + "type": "Element", + }, + ) + geo_location_point: list[Point] = field( + default_factory=list, + metadata={ + "name": "geoLocationPoint", + "type": "Element", + }, + ) + geo_location_box: list[Box] = field( + default_factory=list, + metadata={ + "name": "geoLocationBox", + "type": "Element", + }, + ) + geo_location_polygon: list[Resource.GeoLocations.GeoLocation.GeoLocationPolygon] = field( + default_factory=list, + metadata={ + "name": "geoLocationPolygon", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class GeoLocationPolygon: + polygon_point: list[Point] = field( + default_factory=list, + metadata={ + "name": "polygonPoint", + "type": "Element", + "min_occurs": 4, + }, + ) + in_polygon_point: None | Point = field( + default=None, + metadata={ + "name": "inPolygonPoint", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class FundingReferences: + """Attributes: + funding_reference: Information about financial support (funding) for the resource being registered. + """ + + funding_reference: list[Resource.FundingReferences.FundingReference] = field( + default_factory=list, + metadata={ + "name": "fundingReference", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class FundingReference: + """Attributes: + funder_name: Name of the funding provider. + funder_identifier: Uniquely identifies a funding entity, according to various types. + award_number: The code assigned by the funder to a sponsored award (grant). + award_title: The human readable title of the award (grant). + """ + + funder_name: None | str = field( + default=None, + metadata={ + "name": "funderName", + "type": "Element", + "required": True, + "min_length": 1, + }, + ) + funder_identifier: None | Resource.FundingReferences.FundingReference.FunderIdentifier = field( + default=None, + metadata={ + "name": "funderIdentifier", + "type": "Element", + }, + ) + award_number: None | Resource.FundingReferences.FundingReference.AwardNumber = field( + default=None, + metadata={ + "name": "awardNumber", + "type": "Element", + }, + ) + award_title: None | object = field( + default=None, + metadata={ + "name": "awardTitle", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class FunderIdentifier: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + funder_identifier_type: None | FunderIdentifierType = field( + default=None, + metadata={ + "name": "funderIdentifierType", + "type": "Attribute", + "required": True, + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class AwardNumber: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + award_uri: None | str = field( + default=None, + metadata={ + "name": "awardURI", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class RelatedItems: + """Attributes: + related_item: Information about a resource related to the one being registered e.g. a journal or book + of which the article or chapter is part. + """ + + related_item: list[Resource.RelatedItems.RelatedItem] = field( + default_factory=list, + metadata={ + "name": "relatedItem", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class RelatedItem: + """Attributes: + related_item_identifier: The identifier for the related item. + creators: + titles: + publication_year: The year when the item was or will be made publicly available. + volume: Volume of the related item. + issue: Issue number or name of the related item. + number: Number of the related item e.g. report number of article number. + first_page: First page of the related item e.g. of the chapter, article, or conference paper. + last_page: Last page of the related item e.g. of the chapter, article, or conference paper. + publisher: The name of the entity that holds, archives, publishes prints, distributes, releases, + issues, or produces the resource. This property will be used to formulate the citation, so + consider the prominence of the role. + edition: Edition or version of the related item. + contributors: + related_item_type: The type of the related item, e.g. journal article, book or chapter. + relation_type: Description of the relationship of the resource being registered (A) and the + related resource (B). + """ + + related_item_identifier: None | Resource.RelatedItems.RelatedItem.RelatedItemIdentifier = field( + default=None, + metadata={ + "name": "relatedItemIdentifier", + "type": "Element", + }, + ) + creators: None | Resource.RelatedItems.RelatedItem.Creators = field( + default=None, + metadata={ + "type": "Element", + }, + ) + titles: None | Resource.RelatedItems.RelatedItem.Titles = field( + default=None, + metadata={ + "type": "Element", + }, + ) + publication_year: None | str = field( + default=None, + metadata={ + "name": "publicationYear", + "type": "Element", + "pattern": r"[\d]{4}", + }, + ) + volume: None | object = field( + default=None, + metadata={ + "type": "Element", + }, + ) + issue: None | object = field( + default=None, + metadata={ + "type": "Element", + }, + ) + number: None | Resource.RelatedItems.RelatedItem.Number = field( + default=None, + metadata={ + "type": "Element", + }, + ) + first_page: None | object = field( + default=None, + metadata={ + "name": "firstPage", + "type": "Element", + }, + ) + last_page: None | object = field( + default=None, + metadata={ + "name": "lastPage", + "type": "Element", + }, + ) + publisher: None | object = field( + default=None, + metadata={ + "type": "Element", + }, + ) + edition: None | object = field( + default=None, + metadata={ + "type": "Element", + }, + ) + contributors: None | Resource.RelatedItems.RelatedItem.Contributors = field( + default=None, + metadata={ + "type": "Element", + }, + ) + related_item_type: None | ResourceType = field( + default=None, + metadata={ + "name": "relatedItemType", + "type": "Attribute", + "required": True, + }, + ) + relation_type: None | RelationType = field( + default=None, + metadata={ + "name": "relationType", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class RelatedItemIdentifier: + """Attributes: + value: + related_item_identifier_type: The type of the Identifier for the related item e.g. DOI. + related_metadata_scheme: The name of the scheme. + scheme_uri: The URI of the relatedMetadataScheme. + scheme_type: The type of the relatedMetadataScheme, linked with the schemeURI. + """ + + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + related_item_identifier_type: None | RelatedIdentifierType = field( + default=None, + metadata={ + "name": "relatedItemIdentifierType", + "type": "Attribute", + }, + ) + related_metadata_scheme: None | object = field( + default=None, + metadata={ + "name": "relatedMetadataScheme", + "type": "Attribute", + }, + ) + scheme_uri: None | str = field( + default=None, + metadata={ + "name": "schemeURI", + "type": "Attribute", + }, + ) + scheme_type: None | object = field( + default=None, + metadata={ + "name": "schemeType", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class Creators: + """Attributes: + creator: The institution or person responsible for creating the related resource. To supply + multiple creators, repeat this property. + """ + + creator: list[Resource.RelatedItems.RelatedItem.Creators.Creator] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Creator: + creator_name: None | Resource.RelatedItems.RelatedItem.Creators.Creator.CreatorName = field( + default=None, + metadata={ + "name": "creatorName", + "type": "Element", + "required": True, + }, + ) + given_name: None | object = field( + default=None, + metadata={ + "name": "givenName", + "type": "Element", + }, + ) + family_name: None | object = field( + default=None, + metadata={ + "name": "familyName", + "type": "Element", + }, + ) + + @dataclass(slots=True) + class CreatorName: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + name_type: None | NameType = field( + default=None, + metadata={ + "name": "nameType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Titles: + """Attributes: + title: Title of the related item. + """ + + title: list[Resource.RelatedItems.RelatedItem.Titles.Title] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Title: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + title_type: None | TitleType = field( + default=None, + metadata={ + "name": "titleType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + @dataclass(slots=True) + class Number: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + number_type: None | NumberType = field( + default=None, + metadata={ + "name": "numberType", + "type": "Attribute", + }, + ) + + @dataclass(slots=True) + class Contributors: + """Attributes: + contributor: The institution or person responsible for collecting, managing, distributing, or + otherwise contributing to the development of the resource. + """ + + contributor: list[Resource.RelatedItems.RelatedItem.Contributors.Contributor] = field( + default_factory=list, + metadata={ + "type": "Element", + }, + ) + + @dataclass(slots=True) + class Contributor: + """Attributes: + contributor_name: + given_name: + family_name: + contributor_type: The type of contributor of the resource. + """ + + contributor_name: ( + None | Resource.RelatedItems.RelatedItem.Contributors.Contributor.ContributorName + ) = field( + default=None, + metadata={ + "name": "contributorName", + "type": "Element", + "required": True, + }, + ) + given_name: None | object = field( + default=None, + metadata={ + "name": "givenName", + "type": "Element", + }, + ) + family_name: None | object = field( + default=None, + metadata={ + "name": "familyName", + "type": "Element", + }, + ) + contributor_type: None | ContributorType = field( + default=None, + metadata={ + "name": "contributorType", + "type": "Attribute", + "required": True, + }, + ) + + @dataclass(slots=True) + class ContributorName: + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + name_type: None | NameType = field( + default=None, + metadata={ + "name": "nameType", + "type": "Attribute", + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) diff --git a/src/oaipmh_scythe/models/mixins.py b/src/oaipmh_scythe/models/mixins.py new file mode 100644 index 0000000..6bbd720 --- /dev/null +++ b/src/oaipmh_scythe/models/mixins.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2024 Heinz-Alexander Fütterer +# +# SPDX-License-Identifier: BSD-3-Clause + +"""TODO.""" + +from typing import Any + + +class HeaderMixin: + """A mixin class that provides functionality for managing headers in records. + + Attributes: + status: The status attribute of the header. + """ + + status: Any + + @property + def deleted(self) -> bool: + """Indicate if this header has been deleted. + + Returns: + True if the status attribute contains DELETED, False otherwise. + """ + if self.status and self.status.DELETED: + return True + return False + + +class RecordMixin: + """A mixin class that provides functionality for managing records. + + Attributes: + header: The header of the record. + metadata: The metadata associated with the record. + """ + + header: Any + metadata: Any + + @property + def deleted(self) -> bool: + """Indicate if this record has been deleted. + + Returns: + True if the header's status attribute contains DELETED, False otherwise. + """ + if self.header.status and self.header.status.DELETED: + return True + return False + + def get_metadata(self): + """Return the metadata associated with this record. + + Returns: + The metadata associated with this record. + """ + return self.metadata.other_element diff --git a/src/oaipmh_scythe/models/oai_dc.py b/src/oaipmh_scythe/models/oai_dc.py new file mode 100644 index 0000000..7f137dd --- /dev/null +++ b/src/oaipmh_scythe/models/oai_dc.py @@ -0,0 +1,261 @@ +"""This file was generated by xsdata, v24.4, on 2024-04-04 16:40:52 + +Generator: DataclassGenerator +See: https://xsdata.readthedocs.io/ +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum + + +class LangValue(Enum): + VALUE = "" + + +@dataclass(slots=True) +class ElementType: + class Meta: + name = "elementType" + target_namespace = "http://purl.org/dc/elements/1.1/" + + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + lang: None | str | LangValue = field( + default=None, + metadata={ + "type": "Attribute", + "namespace": "http://www.w3.org/XML/1998/namespace", + }, + ) + + +@dataclass(slots=True) +class Contributor(ElementType): + class Meta: + name = "contributor" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Coverage(ElementType): + class Meta: + name = "coverage" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Creator(ElementType): + class Meta: + name = "creator" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Date(ElementType): + class Meta: + name = "date" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Description(ElementType): + class Meta: + name = "description" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Format(ElementType): + class Meta: + name = "format" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Identifier(ElementType): + class Meta: + name = "identifier" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Language(ElementType): + class Meta: + name = "language" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Publisher(ElementType): + class Meta: + name = "publisher" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Relation(ElementType): + class Meta: + name = "relation" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Rights(ElementType): + class Meta: + name = "rights" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Source(ElementType): + class Meta: + name = "source" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Subject(ElementType): + class Meta: + name = "subject" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class Title(ElementType): + class Meta: + name = "title" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class TypeType(ElementType): + class Meta: + name = "type" + namespace = "http://purl.org/dc/elements/1.1/" + + +@dataclass(slots=True) +class OaiDcType: + class Meta: + name = "oai_dcType" + target_namespace = "http://www.openarchives.org/OAI/2.0/oai_dc/" + + title: list[Title] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + creator: list[Creator] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + subject: list[Subject] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + description: list[Description] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + publisher: list[Publisher] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + contributor: list[Contributor] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + date: list[Date] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + type_value: list[TypeType] = field( + default_factory=list, + metadata={ + "name": "type", + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + format: list[Format] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + identifier: list[Identifier] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + source: list[Source] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + language: list[Language] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + relation: list[Relation] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + coverage: list[Coverage] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + rights: list[Rights] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://purl.org/dc/elements/1.1/", + }, + ) + + +@dataclass(slots=True) +class Dc(OaiDcType): + class Meta: + name = "dc" + namespace = "http://www.openarchives.org/OAI/2.0/oai_dc/" diff --git a/src/oaipmh_scythe/models/oai_pmh.py b/src/oaipmh_scythe/models/oai_pmh.py new file mode 100644 index 0000000..67cb218 --- /dev/null +++ b/src/oaipmh_scythe/models/oai_pmh.py @@ -0,0 +1,648 @@ +"""This file was generated by xsdata, v24.4, on 2024-04-16 13:28:57 + +Generator: DataclassGenerator +See: https://xsdata.readthedocs.io/ +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from enum import Enum + +from xsdata.models.datatype import XmlDate, XmlDateTime + +from oaipmh_scythe.models.mixins import HeaderMixin, RecordMixin + +__NAMESPACE__ = "http://www.openarchives.org/OAI/2.0/" + + +class OaiPmherrorcode(Enum): + CANNOT_DISSEMINATE_FORMAT = "cannotDisseminateFormat" + ID_DOES_NOT_EXIST = "idDoesNotExist" + BAD_ARGUMENT = "badArgument" + BAD_VERB = "badVerb" + NO_METADATA_FORMATS = "noMetadataFormats" + NO_RECORDS_MATCH = "noRecordsMatch" + BAD_RESUMPTION_TOKEN = "badResumptionToken" + NO_SET_HIERARCHY = "noSetHierarchy" + + +@dataclass(slots=True) +class About: + """Data "about" the record must be expressed in XML that is compliant with an XML Schema defined by a community.""" + + class Meta: + name = "aboutType" + + other_element: None | object = field( + default=None, + metadata={ + "type": "Wildcard", + "namespace": "##other", + }, + ) + + +class DeletedRecord(Enum): + NO = "no" + PERSISTENT = "persistent" + TRANSIENT = "transient" + + +@dataclass(slots=True) +class Description: + """The descriptionType is used for the description element in Identify and for setDescription element in ListSets. + + Content must be compliant with an XML Schema defined by a community. + """ + + class Meta: + name = "descriptionType" + + other_element: None | object = field( + default=None, + metadata={ + "type": "Wildcard", + "namespace": "##other", + }, + ) + + +class Granularity(Enum): + YYYY_MM_DD = "YYYY-MM-DD" + YYYY_MM_DDTHH_MM_SS_Z = "YYYY-MM-DDThh:mm:ssZ" + + +@dataclass(slots=True) +class MetadataFormat: + class Meta: + name = "metadataFormatType" + + metadata_prefix: None | str = field( + default=None, + metadata={ + "name": "metadataPrefix", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + "pattern": r"[A-Za-z0-9\-_\.!~\*'\(\)]+", + }, + ) + schema: None | str = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + metadata_namespace: None | str = field( + default=None, + metadata={ + "name": "metadataNamespace", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + + +@dataclass(slots=True) +class Metadata: + """Metadata must be expressed in XML that complies with another XML Schema (namespace=#other). + + Metadata must be explicitly qualified in the response. + """ + + class Meta: + name = "metadataType" + + other_element: None | object = field( + default=None, + metadata={ + "type": "Wildcard", + "namespace": "##other", + }, + ) + + +class ProtocolVersion(Enum): + VALUE_2_0 = "2.0" + + +@dataclass(slots=True) +class ResumptionToken: + """A resumptionToken may have 3 optional attributes and can be used in ListSets, ListIdentifiers, ListRecords responses.""" + + class Meta: + name = "resumptionTokenType" + + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + expiration_date: None | XmlDateTime = field( + default=None, + metadata={ + "name": "expirationDate", + "type": "Attribute", + }, + ) + complete_list_size: None | int = field( + default=None, + metadata={ + "name": "completeListSize", + "type": "Attribute", + }, + ) + cursor: None | int = field( + default=None, + metadata={ + "type": "Attribute", + }, + ) + + +class Status(Enum): + DELETED = "deleted" + + +class Verb(Enum): + IDENTIFY = "Identify" + LIST_METADATA_FORMATS = "ListMetadataFormats" + LIST_SETS = "ListSets" + GET_RECORD = "GetRecord" + LIST_IDENTIFIERS = "ListIdentifiers" + LIST_RECORDS = "ListRecords" + + +@dataclass(slots=True) +class Identify: + class Meta: + name = "IdentifyType" + + repository_name: None | str = field( + default=None, + metadata={ + "name": "repositoryName", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + base_url: None | str = field( + default=None, + metadata={ + "name": "baseURL", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + protocol_version: None | ProtocolVersion = field( + default=None, + metadata={ + "name": "protocolVersion", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + admin_email: list[str] = field( + default_factory=list, + metadata={ + "name": "adminEmail", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "min_occurs": 1, + "pattern": r"\S+@(\S+\.)+\S+", + }, + ) + earliest_datestamp: None | XmlDate | str = field( + default=None, + metadata={ + "name": "earliestDatestamp", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + "pattern": r".*Z", + }, + ) + deleted_record: None | DeletedRecord = field( + default=None, + metadata={ + "name": "deletedRecord", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + granularity: None | Granularity = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + compression: list[str] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + description: list[Description] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class ListMetadataFormats: + class Meta: + name = "ListMetadataFormatsType" + + metadata_format: list[MetadataFormat] = field( + default_factory=list, + metadata={ + "name": "metadataFormat", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "min_occurs": 1, + }, + ) + + +@dataclass(slots=True) +class OaiPmherror: + class Meta: + name = "OAI-PMHerrorType" + + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + code: None | OaiPmherrorcode = field( + default=None, + metadata={ + "type": "Attribute", + "required": True, + }, + ) + + +@dataclass(slots=True) +class Header(HeaderMixin): + """A header has a unique identifier, a datestamp, and setSpec(s) in case the + item from which the record is disseminated belongs to set(s). + + the header can carry a deleted status indicating that the record is deleted. + """ + + class Meta: + name = "headerType" + + identifier: None | str = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + datestamp: None | XmlDate | str = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + "pattern": r".*Z", + }, + ) + set_spec: list[str] = field( + default_factory=list, + metadata={ + "name": "setSpec", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "pattern": r"([A-Za-z0-9\-_\.!~\*'\(\)])+(:[A-Za-z0-9\-_\.!~\*'\(\)]+)*", + }, + ) + status: None | Status = field( + default=None, + metadata={ + "type": "Attribute", + }, + ) + + +@dataclass(slots=True) +class Request: + """Define requestType, indicating the protocol request that led to the + response. + + Element content is BASE-URL, attributes are arguments of protocol request, attribute-values are values of + arguments of protocol request + """ + + class Meta: + name = "requestType" + + value: str = field( + default="", + metadata={ + "required": True, + }, + ) + verb: None | Verb = field( + default=None, + metadata={ + "type": "Attribute", + }, + ) + identifier: None | str = field( + default=None, + metadata={ + "type": "Attribute", + }, + ) + metadata_prefix: None | str = field( + default=None, + metadata={ + "name": "metadataPrefix", + "type": "Attribute", + "pattern": r"[A-Za-z0-9\-_\.!~\*'\(\)]+", + }, + ) + from_value: None | XmlDate | str = field( + default=None, + metadata={ + "name": "from", + "type": "Attribute", + "pattern": r".*Z", + }, + ) + until: None | XmlDate | str = field( + default=None, + metadata={ + "type": "Attribute", + "pattern": r".*Z", + }, + ) + set: None | str = field( + default=None, + metadata={ + "type": "Attribute", + "pattern": r"([A-Za-z0-9\-_\.!~\*'\(\)])+(:[A-Za-z0-9\-_\.!~\*'\(\)]+)*", + }, + ) + resumption_token: None | str = field( + default=None, + metadata={ + "name": "resumptionToken", + "type": "Attribute", + }, + ) + + +@dataclass(slots=True) +class Set: + class Meta: + name = "setType" + + set_spec: None | str = field( + default=None, + metadata={ + "name": "setSpec", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + "pattern": r"([A-Za-z0-9\-_\.!~\*'\(\)])+(:[A-Za-z0-9\-_\.!~\*'\(\)]+)*", + }, + ) + set_name: None | str = field( + default=None, + metadata={ + "name": "setName", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + set_description: list[Description] = field( + default_factory=list, + metadata={ + "name": "setDescription", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class ListIdentifiers: + class Meta: + name = "ListIdentifiersType" + + header: list[Header] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "min_occurs": 1, + }, + ) + resumption_token: None | ResumptionToken = field( + default=None, + metadata={ + "name": "resumptionToken", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class ListSets: + class Meta: + name = "ListSetsType" + + set: list[Set] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "min_occurs": 1, + }, + ) + resumption_token: None | ResumptionToken = field( + default=None, + metadata={ + "name": "resumptionToken", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class Record(RecordMixin): + """A record has a header, a metadata part, and an optional about container.""" + + class Meta: + name = "recordType" + + header: None | Header = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + metadata: None | Metadata = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + about: list[About] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class GetRecord: + class Meta: + name = "GetRecordType" + + record: None | Record = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + + +@dataclass(slots=True) +class ListRecords: + class Meta: + name = "ListRecordsType" + + record: list[Record] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "min_occurs": 1, + }, + ) + resumption_token: None | ResumptionToken = field( + default=None, + metadata={ + "name": "resumptionToken", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class OaiPmhtype: + class Meta: + name = "OAI-PMHtype" + + response_date: None | XmlDateTime = field( + default=None, + metadata={ + "name": "responseDate", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + request: None | Request = field( + default=None, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + "required": True, + }, + ) + error: list[OaiPmherror] = field( + default_factory=list, + metadata={ + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + identify: None | Identify = field( + default=None, + metadata={ + "name": "Identify", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + list_metadata_formats: None | ListMetadataFormats = field( + default=None, + metadata={ + "name": "ListMetadataFormats", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + list_sets: None | ListSets = field( + default=None, + metadata={ + "name": "ListSets", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + get_record: None | GetRecord = field( + default=None, + metadata={ + "name": "GetRecord", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + list_identifiers: None | ListIdentifiers = field( + default=None, + metadata={ + "name": "ListIdentifiers", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + list_records: None | ListRecords = field( + default=None, + metadata={ + "name": "ListRecords", + "type": "Element", + "namespace": "http://www.openarchives.org/OAI/2.0/", + }, + ) + + +@dataclass(slots=True) +class OaiPmh(OaiPmhtype): + class Meta: + name = "OAI-PMH" + namespace = "http://www.openarchives.org/OAI/2.0/" diff --git a/src/oaipmh_scythe/response.py b/src/oaipmh_scythe/response.py index 0d49f41..765317e 100644 --- a/src/oaipmh_scythe/response.py +++ b/src/oaipmh_scythe/response.py @@ -15,40 +15,123 @@ from dataclasses import dataclass from typing import TYPE_CHECKING -from lxml import etree +import httpx +from xsdata.formats.dataclass.context import XmlContext +from xsdata.formats.dataclass.parsers import XmlParser + +from oaipmh_scythe import exceptions +from oaipmh_scythe.models.oai_pmh import OaiPmh +from oaipmh_scythe.utils import load_models if TYPE_CHECKING: - from httpx import Response + from oaipmh_scythe.models.oai_pmh import OaiPmherror -XMLParser = etree.XMLParser(remove_blank_text=True, recover=True, resolve_entities=False) +CONTEXT = XmlContext() +PARSER = XmlParser(context=CONTEXT) -@dataclass -class OAIResponse: - """Represents a response received from an OAI server, encapsulating the raw HTTP response and parsed XML content. +def _build_response(http_response: httpx.Response, metadata_prefix: str) -> Response: + """Build a response object from an HTTP response. - This class provides a structured way to access various aspects of an OAI server's response. - It offers methods to retrieve the raw text of the response, parse it as XML, - and obtain a string representation of the response that includes the OAI verb. + This function is used to construct a response object from an HTTP response. It checks if the server returned + an error status code and raises an exception if so. Otherwise, it parses the response content using + `_parse_response` and returns a Response object with the parsed data. - Attributes: - http_response: The original HTTP response object from the OAI server. - params: A dictionary of the OAI parameters used in the request that led to this response. + Args: + http_response: The HTTP response to build a response from. + metadata_prefix: The metadata format used in the request. + + Returns: + A built response object. + + Raises: + httpx.HTTPError: If the server returned an error status code >= 500. """ + if http_response.is_server_error: + http_response.raise_for_status() + parsed = _parse_response(http_response.content, metadata_prefix) + return Response( + status_code=httpx.codes(http_response.status_code), + content=http_response.content, + headers=http_response.headers, + parsed=parsed, + ) + - http_response: Response - params: dict[str, str] +def _parse_response(content: bytes, metadata_prefix: str) -> OaiPmh: + """Parse an HTTP response content into an OAI-PMH object. + + This function uses the xsdata XmlParser to convert the HTTP response content into an OAI-PMH object. It first loads + any necessary models, then parse the content using the parser. If there are errors in the XML response, + it raises the appropriate exception. + + Args: + content: The HTTP response content to parse. + metadata_prefix: The metadata format used in the request. + + Returns: + The parsed OAI-PMH object. + + Raises: + exceptions.OAIPMHException: If there is an error sent from the server in the response content. + """ + load_models(metadata_prefix) + parsed = PARSER.from_bytes(content, OaiPmh) + raise_for_error(parsed.error) + return parsed - @property - def raw(self) -> str: - """Return the raw text of the server's response as a unicode string.""" - return self.http_response.text - @property - def xml(self) -> etree._Element: - """Parse the server's response content and return it as an `etree._Element` object.""" - return etree.XML(self.http_response.content, parser=XMLParser) +def raise_for_error(errors: list[OaiPmherror] | None) -> None: + """Raise an exception for each error in the given list. + + Args: + errors: A list of OAI-PMH errors to raise exceptions for. If None, no exceptions are raised. + + Returns: + None. + + Raises: + exceptions.OAIPMHException: If the error list is empty or contains unknown error codes, the appropriate + exception is raised. Specific exceptions are raised for each known error code. + """ + if errors is None: + return + for error in errors: + if error.code: + match error.code: + case error.code.BAD_ARGUMENT: + raise exceptions.BadArgument(error.value) + case error.code.BAD_RESUMPTION_TOKEN: + raise exceptions.BadResumptionToken(error.value) + case error.code.BAD_VERB: + raise exceptions.BadVerb(error.value) + case error.code.CANNOT_DISSEMINATE_FORMAT: + raise exceptions.CannotDisseminateFormat(error.value) + case error.code.ID_DOES_NOT_EXIST: + raise exceptions.IdDoesNotExist(error.value) + case error.code.NO_METADATA_FORMATS: + raise exceptions.NoMetadataFormat(error.value) + case error.code.NO_RECORDS_MATCH: + raise exceptions.NoRecordsMatch(error.value) + case error.code.NO_SET_HIERARCHY: + raise exceptions.NoSetHierarchy(error.value) + case _: + raise exceptions.GeneralOAIPMHError(error.value) + raise exceptions.GeneralOAIPMHError(error) + + +@dataclass +class Response: + """A response received from an OAI server, encapsulating the raw HTTP response and parsed content. + + Attributes: + status_code: The HTTP status code of the response. + headers: A dictionary-like object containing metadata about the response, such as content type and length. + content: The raw bytes of the response content. + parsed: The parsed OAI-PMH object representing the OAI-PMH metadata in the response. + """ - def __str__(self) -> str: - verb = self.params.get("verb") - return f"" + status_code: httpx.codes + headers: httpx.Headers + content: bytes + parsed: OaiPmh diff --git a/src/oaipmh_scythe/utils.py b/src/oaipmh_scythe/utils.py index e07a157..e75bf00 100644 --- a/src/oaipmh_scythe/utils.py +++ b/src/oaipmh_scythe/utils.py @@ -13,22 +13,19 @@ log_response: Log the details of an HTTP response. remove_none_values: Remove keys from the dictionary where the value is `None`. filter_dict_except_resumption_token: Filter keys from the dictionary, if resumption token is not `None`. - get_namespace: Extracts the namespace from an XML element. - xml_to_dict: Converts an XML tree or element into a dictionary representation. """ from __future__ import annotations import logging -import re -from collections import defaultdict +from enum import Enum from typing import TYPE_CHECKING if TYPE_CHECKING: from typing import Any import httpx - from lxml import etree + logger = logging.getLogger(__name__) @@ -70,10 +67,10 @@ def filter_dict_except_resumption_token(d: dict[str, Any | None]) -> dict[str, A with None values. Args: - d (dict[str, Any | None]): The dictionary to filter. + d: The dictionary to filter. Returns: - dict[str, Any]: A filtered dictionary based on the defined criteria. + A filtered dictionary based on the defined criteria. """ allowed_keys = ("verb", "resumptionToken") resumption_token_present = d["resumptionToken"] is not None @@ -86,54 +83,40 @@ def filter_dict_except_resumption_token(d: dict[str, Any | None]) -> dict[str, A return d -def get_namespace(element: etree._Element) -> str | None: - """Return the namespace URI of an XML element. - - Extracts and returns the namespace URI from the tag of the given XML element. - The namespace URI is enclosed in curly braces at the start of the tag. - If the element does not have a namespace, `None` is returned. - - Args: - element: The XML element from which to extract the namespace. - - Returns: - The namespace URI as a string if the element has a namespace, otherwise `None`. - """ - match = re.search(r"(\{.*\})", element.tag) - return match.group(1) if match else None - +def load_models(metadata_prefix: str | None = None) -> None: + """Load models based on the provided metadata prefix. -def xml_to_dict( - tree: etree._Element, paths: list[str] | None = None, nsmap: dict[str, str] | None = None, strip_ns: bool = False -) -> dict[str, list[str | None]]: - """Convert an XML tree to a dictionary, with options for custom XPath and namespace handling. - - This function takes an XML element tree and converts it into a dictionary. The keys of the - dictionary are the tags of the XML elements, and the values are lists of the text contents - of these elements. It offers options to apply specific XPath expressions, handle namespaces, - and optionally strip namespaces from the tags in the resulting dictionary. + After loading these models, they are available to the xsdata XmlParser for parsing XML responses into the + appropriate dataclasses. Args: - tree: The root element of the XML tree to be converted. - paths: An optional list of XPath expressions to apply on the XML tree. If None or not - provided, the function will consider all elements in the tree. - nsmap: An optional dictionary for namespace mapping, used to provide shorter, more - readable paths in XPath expressions. If None or not provided, no namespace - mapping is applied. - strip_ns: A boolean flag indicating whether to remove namespaces from the element tags - in the resulting dictionary. Defaults to False. + metadata_prefix: The metadata format of the response to be parsed. Possible values are 'oai_dc' and 'datacite'. Returns: - A dictionary where each key is an element tag (with or without namespace, based on - `strip_ns`) and each value is a list of strings representing the text content of - each element with that tag. + None """ - paths = paths or [".//"] - nsmap = nsmap or {} - fields = defaultdict(list) - for path in paths: - elements = tree.findall(path, nsmap) - for element in elements: - tag = re.sub(r"\{.*\}", "", element.tag) if strip_ns else element.tag - fields[tag].append(element.text) - return dict(fields) + match metadata_prefix: + case "oai_dc": + from oaipmh_scythe.models.oai_dc import Dc # noqa: F401 + case "datacite": + from oaipmh_scythe.models.datacite import Resource # noqa: F401 + case _: + pass + + +# class ReturnType(Enum): +# DATACLASS = "dataclass" +# JSON = "json" +# DICT = "dict" + + +# def serialize(data, return_type: ReturnType = ReturnType.DATACLASS): +# # check if data is a valid oai pmh thing + +# match return_type: +# case "json": +# return "json" +# case "dict": +# return "dict" +# case _: +# return data diff --git a/tests/conftest.py b/tests/conftest.py index aacb83b..70b7244 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ from __future__ import annotations +import httpx import pytest from oaipmh_scythe import Scythe @@ -17,3 +18,20 @@ def vcr_config() -> dict[str, str]: @pytest.fixture() def scythe() -> Scythe: return Scythe("https://zenodo.org/oai2d") + + +@pytest.fixture() +def identify_response() -> httpx.Response: + identify_response_xml = """ + + 2023-11-09T09:53:46Z + https://zenodo.org/oai2d + + Zenodo + https://zenodo.org/oai2d + 2.0 + + + """ + return httpx.Response(status_code=httpx.codes.OK, content=identify_response_xml) diff --git a/tests/integration/test_get_record.py b/tests/integration/test_get_record.py index f5163f0..117f081 100644 --- a/tests/integration/test_get_record.py +++ b/tests/integration/test_get_record.py @@ -7,8 +7,8 @@ from typing import TYPE_CHECKING import pytest -from httpx import HTTPStatusError +from oaipmh_scythe.exceptions import BadArgument, IdDoesNotExist from oaipmh_scythe.models import Record if TYPE_CHECKING: @@ -23,7 +23,7 @@ def test_get_record_with_default_metadata_prefix(scythe: Scythe) -> None: record = scythe.get_record(identifier=IDENTIFIER, metadata_prefix="oai_dc") assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE + assert record.metadata.other_element.title[0].value == TITLE @pytest.mark.default_cassette("get_record.yaml") @@ -31,7 +31,7 @@ def test_get_record_with_default_metadata_prefix(scythe: Scythe) -> None: def test_get_record_without_metadata_prefix(scythe: Scythe) -> None: record = scythe.get_record(identifier=IDENTIFIER) assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE + assert record.metadata.other_element.title[0].value == TITLE @pytest.mark.default_cassette("get_record.yaml") @@ -39,20 +39,18 @@ def test_get_record_without_metadata_prefix(scythe: Scythe) -> None: def test_get_record_with_valid_metadata_prefix(scythe: Scythe) -> None: record = scythe.get_record(identifier=IDENTIFIER, metadata_prefix="datacite") assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE + assert record.metadata.other_element.titles.title[0].value == TITLE @pytest.mark.default_cassette("get_record.yaml") @pytest.mark.vcr() def test_get_record_with_invalid_metadata_prefix(scythe: Scythe) -> None: - with pytest.raises(HTTPStatusError): - # cannotDisseminateFormat + with pytest.raises(BadArgument, match="metadataPrefix does not exist"): scythe.get_record(identifier=IDENTIFIER, metadata_prefix="XXX") @pytest.mark.default_cassette("id_does_not_exist.yaml") @pytest.mark.vcr() def test_get_record_with_invalid_identifier(scythe: Scythe) -> None: - # idDoesNotExist - with pytest.raises(HTTPStatusError): + with pytest.raises(IdDoesNotExist, match="No matching identifier"): scythe.get_record(identifier="oai:zenodo.org:XXX", metadata_prefix="oai_dc") diff --git a/tests/integration/test_identify.py b/tests/integration/test_identify.py index b145b69..7a0184f 100644 --- a/tests/integration/test_identify.py +++ b/tests/integration/test_identify.py @@ -30,14 +30,14 @@ def test_context_manager() -> None: def test_identify(scythe: Scythe) -> None: identify = scythe.identify() assert isinstance(identify, Identify) - assert identify.repositoryName == "Zenodo" + assert identify.repository_name == "Zenodo" @pytest.mark.default_cassette("identify.yaml") @pytest.mark.vcr() def test_non_oai_pmh_url() -> None: scythe = Scythe("https://duckduckgo.com/") - with pytest.raises(ValueError, match="Identify element not found in the XML"): + with pytest.raises(ValueError, match="Unknown property {http://www.openarchives.org/OAI/2.0/}OAI-PMH:head"): scythe.identify() scythe.close() diff --git a/tests/integration/test_list_identifiers.py b/tests/integration/test_list_identifiers.py index 9910ee3..0b70cc0 100644 --- a/tests/integration/test_list_identifiers.py +++ b/tests/integration/test_list_identifiers.py @@ -6,12 +6,11 @@ from collections.abc import Iterator -import httpx import pytest -from lxml import etree -from oaipmh_scythe import OAIResponse, Scythe -from oaipmh_scythe.iterator import OAIResponseIterator +from oaipmh_scythe import Response, Scythe +from oaipmh_scythe.exceptions import BadArgument, BadResumptionToken, NoRecordsMatch +from oaipmh_scythe.iterator import ResponseIterator from oaipmh_scythe.models import Header @@ -48,9 +47,8 @@ def test_list_identifiers_with_valid_metadata_prefix(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_list_identifiers_with_invalid_metadata_prefix(scythe: Scythe) -> None: - # cannotDisseminateFormat headers = scythe.list_identifiers(metadata_prefix="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(BadArgument, match="metadataPrefix does not exist"): next(headers) @@ -91,9 +89,8 @@ def test_list_identifiers_with_valid_set(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_list_identifiers_with_invalid_set(scythe: Scythe) -> None: - # noRecordsMatch headers = scythe.list_identifiers(set_="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(NoRecordsMatch): next(headers) @@ -110,18 +107,16 @@ def test_list_identifiers_with_valid_resumption_token(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_list_identifiers_with_invalid_resumption_token(scythe: Scythe) -> None: - # badResumptionToken headers = scythe.list_identifiers(resumption_token="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(BadResumptionToken, match="The value of the resumptionToken argument is invalid or expired."): next(headers) @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_list_identifiers_raises_no_records_match(scythe: Scythe) -> None: - # noRecordsMatch headers = scythe.list_identifiers(from_="2025-01-15") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(NoRecordsMatch): next(headers) @@ -138,11 +133,9 @@ def test_list_identifiers_ignore_deleted(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_list_identifiers_oai_response(scythe: Scythe) -> None: - scythe.iterator = OAIResponseIterator + scythe.iterator = ResponseIterator responses = scythe.list_identifiers(metadata_prefix="oai_dc") assert isinstance(responses, Iterator) response = next(responses) - assert isinstance(response, OAIResponse) - assert response.params == {"metadataPrefix": "oai_dc", "verb": "ListIdentifiers"} - assert isinstance(response.xml, etree._Element) - assert response.xml.tag == "{http://www.openarchives.org/OAI/2.0/}OAI-PMH" + assert isinstance(response, Response) + # TODO: assert response properly diff --git a/tests/integration/test_list_metadata_formats.py b/tests/integration/test_list_metadata_formats.py index a2cb7d0..d6f62e8 100644 --- a/tests/integration/test_list_metadata_formats.py +++ b/tests/integration/test_list_metadata_formats.py @@ -8,8 +8,8 @@ from typing import TYPE_CHECKING import pytest -from httpx import HTTPStatusError +from oaipmh_scythe.exceptions import IdDoesNotExist from oaipmh_scythe.models import MetadataFormat if TYPE_CHECKING: @@ -23,7 +23,7 @@ def test_list_metadata_formats(scythe: Scythe) -> None: assert isinstance(metadata_formats, Iterator) metadata_format = next(metadata_formats) assert isinstance(metadata_format, MetadataFormat) - assert metadata_format.metadataPrefix == "marcxml" + assert metadata_format.metadata_prefix == "marcxml" @pytest.mark.default_cassette("list_metadata_formats.yaml") @@ -33,13 +33,12 @@ def test_list_metadata_formats_with_valid_identifier(scythe: Scythe) -> None: assert isinstance(metadata_formats, Iterator) metadata_format = next(metadata_formats) assert isinstance(metadata_format, MetadataFormat) - assert metadata_format.metadataPrefix == "marcxml" + assert metadata_format.metadata_prefix == "marcxml" @pytest.mark.default_cassette("list_metadata_formats.yaml") @pytest.mark.vcr() def test_list_metadata_formats_with_invalid_identifier(scythe: Scythe) -> None: - # idDoesNotExist metadata_formats = scythe.list_metadata_formats(identifier="oai:zenodo.org:XXX") - with pytest.raises(HTTPStatusError): + with pytest.raises(IdDoesNotExist, match="No matching identifier"): next(metadata_formats) diff --git a/tests/integration/test_list_records.py b/tests/integration/test_list_records.py index bd4e760..4a5a2ce 100644 --- a/tests/integration/test_list_records.py +++ b/tests/integration/test_list_records.py @@ -7,13 +7,12 @@ from collections.abc import Iterator from typing import TYPE_CHECKING -import httpx import pytest -from lxml import etree -from oaipmh_scythe.iterator import OAIResponseIterator +from oaipmh_scythe.exceptions import BadArgument, BadResumptionToken, NoRecordsMatch +from oaipmh_scythe.iterator import ResponseIterator from oaipmh_scythe.models import Record -from oaipmh_scythe.response import OAIResponse +from oaipmh_scythe.response import Response if TYPE_CHECKING: from oaipmh_scythe import Scythe @@ -29,7 +28,7 @@ def test_list_records_with_default_metadata_prefix(scythe: Scythe) -> None: assert isinstance(records, Iterator) record = next(records) assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE_1 + assert record.metadata.other_element.title[0].value == TITLE_1 @pytest.mark.default_cassette("list_records.yaml") @@ -39,7 +38,7 @@ def test_list_records_without_metadata_prefix(scythe: Scythe) -> None: assert isinstance(records, Iterator) record = next(records) assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE_1 + assert record.metadata.other_element.title[0].value == TITLE_1 @pytest.mark.default_cassette("list_records.yaml") @@ -49,15 +48,14 @@ def test_list_records_with_valid_metadata_prefix(scythe: Scythe) -> None: assert isinstance(records, Iterator) record = next(records) assert isinstance(record, Record) - assert record.metadata["title"][0] == TITLE_1 + assert record.metadata.other_element.titles.title[0].value == TITLE_1 @pytest.mark.default_cassette("list_records.yaml") @pytest.mark.vcr() def test_list_records_with_invalid_metadata_prefix(scythe: Scythe) -> None: - # cannotDisseminateFormat records = scythe.list_records(metadata_prefix="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(BadArgument, match="metadataPrefix does not exist"): next(records) @@ -67,7 +65,7 @@ def test_list_records_with_from(scythe: Scythe) -> None: records = scythe.list_records(from_="2024-01-16") assert isinstance(records, Iterator) record = next(records) - assert record.metadata["title"][0] == TITLE_2 + assert record.metadata.other_element.title[0].value == TITLE_2 @pytest.mark.default_cassette("list_records.yaml") @@ -76,7 +74,7 @@ def test_list_records_with_until(scythe: Scythe) -> None: records = scythe.list_records(until="2024-01-17") assert isinstance(records, Iterator) record = next(records) - assert record.metadata["title"][0] == TITLE_1 + assert record.metadata.other_element.title[0].value == TITLE_1 @pytest.mark.default_cassette("list_records.yaml") @@ -84,7 +82,7 @@ def test_list_records_with_until(scythe: Scythe) -> None: def test_list_records_with_from_and_until(scythe: Scythe) -> None: records = scythe.list_records(from_="2024-01-16", until="2024-01-17") record = next(records) - assert record.metadata["title"][0] == TITLE_2 + assert record.metadata.other_element.title[0].value == TITLE_2 @pytest.mark.default_cassette("list_records.yaml") @@ -92,15 +90,14 @@ def test_list_records_with_from_and_until(scythe: Scythe) -> None: def test_list_records_with_valid_set(scythe: Scythe) -> None: records = scythe.list_records(set_="software") record = next(records) - assert record.metadata["title"][0] == "plasmo-dev/PlasmoExamples: Initial Release" + assert record.metadata.other_element.title[0].value == "plasmo-dev/PlasmoExamples: Initial Release" @pytest.mark.default_cassette("list_records.yaml") @pytest.mark.vcr() def test_list_records_with_invalid_set(scythe: Scythe) -> None: - # noRecordsMatch records = scythe.list_records(set_="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(NoRecordsMatch): next(records) @@ -111,24 +108,22 @@ def test_list_records_with_valid_resumption_token(scythe: Scythe) -> None: records = scythe.list_records(resumption_token=token) assert isinstance(records, Iterator) record = next(records) - assert record + assert isinstance(record, Record) @pytest.mark.default_cassette("list_records.yaml") @pytest.mark.vcr() def test_list_records_with_invalid_resumption_token(scythe: Scythe) -> None: - # badResumptionToken records = scythe.list_records(resumption_token="XXX") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(BadResumptionToken, match="The value of the resumptionToken argument is invalid or expired."): next(records) @pytest.mark.default_cassette("list_records.yaml") @pytest.mark.vcr() def test_list_records_raises_no_records_match(scythe: Scythe) -> None: - # noRecordsMatch records = scythe.list_records(from_="2025-01-15") - with pytest.raises(httpx.HTTPStatusError): + with pytest.raises(NoRecordsMatch): next(records) @@ -145,14 +140,12 @@ def test_list_records_ignore_deleted(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_records.yaml") @pytest.mark.vcr() def test_list_records_oai_response(scythe: Scythe) -> None: - scythe.iterator = OAIResponseIterator - responses = scythe.list_records() - assert isinstance(responses, Iterator) - responses = list(responses) + scythe.iterator = ResponseIterator + _responses = scythe.list_records() + assert isinstance(_responses, Iterator) + responses = list(_responses) # there are 3 canned responses in list_records.yaml assert len(responses) == 3 response = responses[0] - assert isinstance(response, OAIResponse) - assert response.params == {"metadataPrefix": "oai_dc", "verb": "ListRecords"} - assert isinstance(response.xml, etree._Element) - assert response.xml.tag == "{http://www.openarchives.org/OAI/2.0/}OAI-PMH" + assert isinstance(response, Response) + # TODO: assert response properly diff --git a/tests/integration/test_list_sets.py b/tests/integration/test_list_sets.py index f92cece..a50047e 100644 --- a/tests/integration/test_list_sets.py +++ b/tests/integration/test_list_sets.py @@ -8,8 +8,8 @@ from typing import TYPE_CHECKING import pytest -from httpx import HTTPStatusError +from oaipmh_scythe.exceptions import BadResumptionToken from oaipmh_scythe.models import Set if TYPE_CHECKING: @@ -26,7 +26,7 @@ def test_list_sets(scythe: Scythe) -> None: assert len(sets) == 10 s = sets[0] assert isinstance(s, Set) - assert s.setName == "European Middleware Initiative" + assert s.set_name == "European Middleware Initiative" @pytest.mark.default_cassette("list_sets.yaml") @@ -42,7 +42,6 @@ def test_list_sets_with_valid_resumption_token(scythe: Scythe) -> None: @pytest.mark.default_cassette("list_sets.yaml") @pytest.mark.vcr() def test_list_sets_with_invalid_resumption_token(scythe: Scythe) -> None: - # badResumptionToken sets = scythe.list_sets(resumption_token="XXX") - with pytest.raises(HTTPStatusError): + with pytest.raises(BadResumptionToken, match="The value of the resumptionToken argument is invalid or expired."): sets = list(sets) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f2484bb..ed2a9ff 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5,6 +5,7 @@ from __future__ import annotations +from collections.abc import Iterator from contextlib import suppress from typing import TYPE_CHECKING @@ -12,15 +13,28 @@ import pytest from oaipmh_scythe import Scythe +from oaipmh_scythe.models import Identify, Record if TYPE_CHECKING: + from pytest_mock.plugin import MockerFixture, MockType + from respx.models import Route from respx.router import MockRouter -query = {"verb": "ListIdentifiers", "metadataPrefix": "oai_dc"} +query = {"verb": "Identify"} auth = ("username", "password") +@pytest.fixture() +def mock_sleep(mocker: MockerFixture) -> MockType: + return mocker.patch("time.sleep") + + +@pytest.fixture() +def mock_identify(respx_mock: MockRouter, identify_response: httpx.Response) -> Route: + return respx_mock.get("https://zenodo.org/oai2d?verb=Identify").mock(return_value=identify_response) + + def test_invalid_http_method() -> None: with pytest.raises(ValueError, match="Invalid HTTP method"): Scythe("https://localhost", http_method="DELETE") @@ -44,87 +58,116 @@ def test_context_manager() -> None: assert isinstance(scythe, Scythe) -def test_override_encoding(scythe: Scythe, respx_mock: MockRouter) -> None: - mock_route = respx_mock.get("https://zenodo.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc").mock( - return_value=httpx.Response(200) - ) +def test_override_encoding(scythe: Scythe, mock_identify: Route) -> None: custom_encoding = "latin_1" scythe.encoding = custom_encoding - oai_response = scythe.harvest(query) - assert mock_route.called - assert oai_response.http_response.encoding == custom_encoding + http_response = scythe._request(query) + assert mock_identify.called + assert http_response.encoding == custom_encoding -def test_post_method(scythe: Scythe, respx_mock: MockRouter) -> None: - mock_route = respx_mock.post("https://zenodo.org/oai2d").mock(return_value=httpx.Response(200)) +def test_post_method(scythe: Scythe, respx_mock: MockRouter, identify_response: httpx.Response) -> None: + mock_route = respx_mock.post("https://zenodo.org/oai2d").mock(return_value=identify_response) scythe.http_method = "POST" - oai_response = scythe.harvest(query) - assert mock_route.called - assert oai_response.http_response.status_code == 200 + response = scythe.harvest(query) + assert mock_route.call_count == 1 + assert response.status_code == httpx.codes.OK -def test_no_retry(scythe: Scythe, respx_mock: MockRouter) -> None: - mock_route = respx_mock.get("https://zenodo.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc").mock( - return_value=httpx.Response(503) - ) +def test_no_retry(scythe: Scythe, mock_identify: Route) -> None: + mock_identify.return_value = httpx.Response(httpx.codes.SERVICE_UNAVAILABLE) with suppress(httpx.HTTPStatusError): scythe.harvest(query) - assert mock_route.call_count == 1 + assert mock_identify.call_count == 1 -def test_retry_on_503(scythe: Scythe, respx_mock: MockRouter, mocker) -> None: +def test_retry_on_503(scythe: Scythe, mock_identify: Route, mock_sleep: MockType) -> None: scythe.max_retries = 3 scythe.default_retry_after = 0 - mock_sleep = mocker.patch("time.sleep") - mock_route = respx_mock.get("https://zenodo.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc").mock( - return_value=httpx.Response(503, headers={"retry-after": "10"}) - ) + mock_identify.return_value = httpx.Response(httpx.codes.SERVICE_UNAVAILABLE, headers={"retry-after": "10"}) with suppress(httpx.HTTPStatusError): scythe.harvest(query) - assert mock_route.call_count == 4 + assert mock_identify.call_count == 4 assert mock_sleep.call_count == 3 mock_sleep.assert_called_with(10) -def test_retry_on_503_without_retry_after_header(scythe: Scythe, respx_mock: MockRouter, mocker) -> None: +def test_retry_on_503_without_retry_after_header(scythe: Scythe, mock_identify: Route, mock_sleep: MockType) -> None: scythe.max_retries = 3 scythe.default_retry_after = 0 - mock_sleep = mocker.patch("time.sleep") - mock_route = respx_mock.get("https://zenodo.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc").mock( - return_value=httpx.Response(503, headers=None) - ) + mock_identify.return_value = httpx.Response(httpx.codes.SERVICE_UNAVAILABLE, headers=None) with suppress(httpx.HTTPStatusError): scythe.harvest(query) - assert mock_route.call_count == 4 + assert mock_identify.call_count == 4 assert mock_sleep.call_count == 3 -def test_retry_on_custom_code(scythe: Scythe, respx_mock: MockRouter, mocker) -> None: - mock_route = respx_mock.get("https://zenodo.org/oai2d?verb=ListIdentifiers&metadataPrefix=oai_dc").mock( - return_value=httpx.Response(500) - ) +def test_retry_on_custom_code(scythe: Scythe, mock_identify: Route, mock_sleep: MockType) -> None: + mock_identify.return_value = httpx.Response(httpx.codes.INTERNAL_SERVER_ERROR) scythe.max_retries = 3 scythe.default_retry_after = 0 - mock_sleep = mocker.patch("time.sleep") - scythe.retry_status_codes = (503, 500) + scythe.retry_status_codes = (httpx.codes.SERVICE_UNAVAILABLE, httpx.codes.INTERNAL_SERVER_ERROR) with suppress(httpx.HTTPStatusError): scythe.harvest(query) - assert mock_route.call_count == 4 + assert mock_identify.call_count == 4 assert mock_sleep.call_count == 3 -def test_no_auth_arguments(): +def test_no_auth_arguments() -> None: with Scythe("https://zenodo.org/oai2d") as scythe: assert scythe.client.auth is None -def test_auth_arguments(): +def test_auth_arguments() -> None: with Scythe("https://zenodo.org/oai2d", auth=auth) as scythe: assert scythe.client.auth -def test_auth_arguments_usage(respx_mock: MockRouter) -> None: +def test_auth_arguments_usage(respx_mock: MockRouter, mock_identify: Route) -> None: scythe = Scythe("https://zenodo.org/oai2d", auth=auth) - respx_mock.get("https://zenodo.org/oai2d").mock(return_value=httpx.Response(200)) - oai_response = scythe.harvest(query) - assert oai_response.http_response.request.headers["authorization"] + http_response = scythe._request(query) + assert http_response.request.headers["authorization"] + + +def test_identify(scythe: Scythe, mock_identify: Route) -> None: + identify = scythe.identify() + assert isinstance(identify, Identify) + + +@pytest.mark.default_cassette("list_records.yaml") +@pytest.mark.vcr() +def test_list_records(scythe: Scythe) -> None: + records = scythe.list_records() + assert isinstance(records, Iterator) + assert next(records) + + +@pytest.mark.default_cassette("list_identifiers.yaml") +@pytest.mark.vcr() +def test_list_identifiers(scythe: Scythe) -> None: + headers = scythe.list_identifiers() + assert isinstance(headers, Iterator) + assert next(headers) + + +@pytest.mark.default_cassette("list_metadata_formats.yaml") +@pytest.mark.vcr() +def test_list_metadata_formats(scythe: Scythe, mocker) -> None: + metadata_formats = scythe.list_metadata_formats() + assert isinstance(metadata_formats, Iterator) + assert next(metadata_formats) + + +@pytest.mark.default_cassette("list_sets.yaml") +@pytest.mark.vcr() +def test_list_sets(scythe: Scythe, mocker) -> None: + sets = scythe.list_sets() + assert isinstance(sets, Iterator) + assert next(sets) + + +@pytest.mark.default_cassette("get_record.yaml") +@pytest.mark.vcr() +def test_get_record(scythe: Scythe) -> None: + record = scythe.get_record(identifier="oai:zenodo.org:10357859") + assert isinstance(record, Record) diff --git a/tests/unit/test_iterator.py b/tests/unit/test_iterator.py index de4374e..f8731f1 100644 --- a/tests/unit/test_iterator.py +++ b/tests/unit/test_iterator.py @@ -6,8 +6,8 @@ import pytest -from oaipmh_scythe import OAIResponse, Scythe -from oaipmh_scythe.iterator import OAIItemIterator, OAIResponseIterator +from oaipmh_scythe import Response, Scythe +from oaipmh_scythe.iterator import OAIItemIterator, ResponseIterator from oaipmh_scythe.models import Header query = {"verb": "ListIdentifiers", "metadataPrefix": "oai_dc"} @@ -16,16 +16,16 @@ @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_iterator_str(scythe: Scythe) -> None: - iterator = OAIResponseIterator(scythe, query) - assert str(iterator) == "" + iterator = ResponseIterator(scythe, query) + assert str(iterator) == "" @pytest.mark.default_cassette("list_identifiers.yaml") @pytest.mark.vcr() def test_oai_response_iterator(scythe: Scythe) -> None: - iterator = OAIResponseIterator(scythe, query) + iterator = ResponseIterator(scythe, query) responses = list(iterator) - assert isinstance(responses[0], OAIResponse) + assert isinstance(responses[0], Response) # there are 3 canned responses in list_identifiers.yaml assert len(responses) == 3 diff --git a/tests/unit/test_models.py b/tests/unit/test_models.py index 85419dd..a0dab54 100644 --- a/tests/unit/test_models.py +++ b/tests/unit/test_models.py @@ -2,266 +2,135 @@ # # SPDX-License-Identifier: BSD-3-Clause -import pytest -from lxml import etree +from xsdata.formats.dataclass.context import XmlContext +from xsdata.formats.dataclass.parsers import XmlParser +from xsdata.models.datatype import XmlDateTime -from oaipmh_scythe import OAIResponse from oaipmh_scythe.models import Header, Identify, MetadataFormat, Record, ResumptionToken, Set +from oaipmh_scythe.models.oai_dc import Dc, Title +from oaipmh_scythe.models.oai_pmh import Metadata, OaiPmherror, OaiPmherrorcode, ProtocolVersion, Status +PARSER = XmlParser(context=XmlContext()) -def test_resumption_token_repr() -> None: - token = ResumptionToken(token="some-token") - assert repr(token) == "" - -@pytest.fixture() -def identify_response(mocker): - xml = """ - - 2023-11-09T09:53:46Z - https://zenodo.org/oai2d - - Zenodo - https://zenodo.org/oai2d - 2.0 - - +def test_identify_parsing() -> None: + identify_xml = """ + + Zenodo + https://zenodo.org/oai2d + 2.0 + """ - mock_response = mocker.MagicMock(spec=OAIResponse) - mock_response.xml = etree.fromstring(xml) - return mock_response - - -@pytest.fixture() -def identify(identify_response) -> Identify: - return Identify(identify_response) - - -def test_identify_bytes(identify): - assert isinstance(identify.__bytes__(), bytes) - assert b"https://zenodo.org/oai2d" in identify.__bytes__() - - -def test_identify_str(identify): - assert isinstance(identify.__str__(), str) - assert "https://zenodo.org/oai2d" in str(identify) - - -def test_identify_raw(identify): - assert isinstance(identify.raw, str) - assert "https://zenodo.org/oai2d" in identify.raw - - -def test_identify_repr(identify): - assert repr(identify) == "" - - -def test_identify_attributes(identify): - assert identify.repositoryName == "Zenodo" - assert identify.baseURL == "https://zenodo.org/oai2d" - assert identify.protocolVersion == "2.0" - - -def test_identify_iter(identify): - identify_items = dict(identify) - assert identify_items["repositoryName"] == ["Zenodo"] - assert identify_items["baseURL"] == ["https://zenodo.org/oai2d"] - assert identify_items["protocolVersion"] == ["2.0"] + identify = PARSER.from_string(identify_xml, Identify) + assert isinstance(identify, Identify) + expected = Identify( + repository_name="Zenodo", base_url="https://zenodo.org/oai2d", protocol_version=ProtocolVersion.VALUE_2_0 + ) + assert identify == expected -@pytest.fixture(scope="session") -def header_element(): - xml = """ +def test_header_parsing(): + header_xml = """
- oai:zenodo.org:6538892 - 2022-05-11T13:49:36Z + oai:zenodo.org:10357859 + 2023-12-11T17:26:46Z
""" - return etree.fromstring(xml.encode()) - - -@pytest.fixture(scope="session") -def deleted_header_element(): - xml = """ -
- oai:zenodo.org:6538892 - 2022-05-11T13:49:36Z -
- """ - return etree.fromstring(xml.encode()) - - -@pytest.fixture() -def header(header_element): - return Header(header_element) - - -@pytest.fixture() -def deleted_header(deleted_header_element): - return Header(deleted_header_element) - - -def test_header_init(header): - assert header.identifier == "oai:zenodo.org:6538892" - assert header.datestamp == "2022-05-11T13:49:36Z" + header = PARSER.from_string(header_xml, Header) + assert isinstance(header, Header) + expected = Header(identifier="oai:zenodo.org:10357859", datestamp="2023-12-11T17:26:46Z") + assert header == expected assert not header.deleted -def test_header_init_with_deleted(deleted_header): - assert deleted_header.identifier == "oai:zenodo.org:6538892" - assert deleted_header.datestamp == "2022-05-11T13:49:36Z" - assert deleted_header.deleted - - -def test_header_repr(header, deleted_header): - assert repr(header) == "
" - assert repr(deleted_header) == "
" +def test_header_deleted(): + header_xml = '
' + header = PARSER.from_string(header_xml, Header) + assert header.deleted -def test_header_iter(header): - items = dict(header) - assert items == {"identifier": "oai:zenodo.org:6538892", "datestamp": "2022-05-11T13:49:36Z", "setSpecs": []} - - -@pytest.fixture() -def record_element(): - xml = """ - -
- oai:example.org:record1 - 2021-01-01 - set1 -
- - - Example Title - Example Creator - - -
+def test_resumption_token_parsing() -> None: + token_xml = """ + eJyNzt1ugjAYgOF7 """ - return etree.fromstring(xml.encode()) + token = PARSER.from_string(token_xml, ResumptionToken) + assert isinstance(token, ResumptionToken) + expiration_date = XmlDateTime(2024, 1, 21, 16, 55, 57) + expected = ResumptionToken( + value="eJyNzt1ugjAYgOF7", cursor=0, expiration_date=expiration_date, complete_list_size=3677115 + ) + assert token == expected -@pytest.fixture() -def deleted_record_lement(): - xml = """ +def test_record_parsing(): + record_xml = """ -
- oai:example.org:record1 - 2021-01-01 - set1 -
+
- - Example Title - Example Creator - + + Research Data Management Organiser (RDMO) +
""" - return etree.fromstring(xml.encode()) - - -@pytest.fixture() -def record(record_element): - return Record(record_element) - - -@pytest.fixture() -def deleted_record(deleted_record_lement): - return Record(deleted_record_lement) - - -def test_record_init(record): - assert isinstance(record.header, Header) - assert record.header.identifier == "oai:example.org:record1" + record = PARSER.from_string(record_xml, Record) + assert isinstance(record, Record) + expected = Record( + header=Header(), + metadata=Metadata(other_element=Dc(title=[Title(value="Research Data Management Organiser (RDMO)")])), + ) + assert record == expected assert not record.deleted - assert "title" in record.metadata - assert record.metadata["title"] == ["Example Title"] - -def test_record_repr(record): - assert repr(record) == "" +def test_record_deleted(): + record = Record(header=Header(status=Status.DELETED)) + assert record.deleted -def test_deleted_record_repr(deleted_record): - assert repr(deleted_record) == "" +def test_record_get_metadata(): + expected = Dc(title=[Title(value="Research Data Management Organiser (RDMO)")]) + record = Record(header=Header(), metadata=Metadata(other_element=expected)) + metadata = record.get_metadata() + assert isinstance(metadata, Dc) + assert metadata == expected -def test_record_iter(record): - record_metadata = dict(record) - assert record_metadata["title"] == ["Example Title"] - assert record_metadata["creator"] == ["Example Creator"] +def test_error_parsing(): + error_xml = 'No matching identifier' + error = PARSER.from_string(error_xml, OaiPmherror) + assert isinstance(error, OaiPmherror) + expected = OaiPmherror(code=OaiPmherrorcode.ID_DOES_NOT_EXIST, value="No matching identifier") + assert error == expected -def test_deleted_record_no_metadata(deleted_record): - assert deleted_record.deleted - with pytest.raises(AttributeError): - _ = record.metadata - -@pytest.fixture() -def set_element(): - xml = """ - - user-emi - European Middleware Initiative - +def test_set_parsing(): + set_xml = """ + + software + Software """ - return etree.fromstring(xml.encode()) - - -@pytest.fixture() -def oai_set(set_element): - return Set(set_element) - - -def test_set_init(oai_set): - assert oai_set.setName == "European Middleware Initiative" - assert "ser-emi" in oai_set.setSpec # spellchecker:disable-line - - -def test_set_repr(oai_set): - assert repr(oai_set) == "" - - -def test_set_iter(oai_set): - set_items = dict(oai_set) - assert set_items["setName"] == ["European Middleware Initiative"] - assert set_items["setSpec"] == ["user-emi"] - - -@pytest.fixture() -def mdf_element(): - xml = """ - - marcxml - https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd - https://www.loc.gov/standards/marcxml/ + set_ = PARSER.from_string(set_xml, Set) + expected = Set(set_spec="software", set_name="Software") + assert isinstance(set_, Set) + assert set_ == expected + + +def test_metadata_format_parsing(): + metadata_format_xml = """ + + oai_dc + http://www.openarchives.org/OAI/2.0/oai_dc.xsd + http://www.openarchives.org/OAI/2.0/oai_dc/ """ - return etree.fromstring(xml.encode()) - - -@pytest.fixture() -def mdf(mdf_element): - return MetadataFormat(mdf_element) - - -def test_metadata_format_init(mdf): - assert mdf.metadataPrefix == "marcxml" - assert mdf.schema == "https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" - assert mdf.metadataNamespace == "https://www.loc.gov/standards/marcxml/" - - -def test_metadata_format_repr(mdf): - assert repr(mdf) == "" - - -def test_metadata_format_iter(mdf): - mdf_items = dict(mdf) - assert mdf_items["metadataPrefix"] == ["marcxml"] - assert mdf_items["schema"] == ["https://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"] - assert mdf_items["metadataNamespace"] == ["https://www.loc.gov/standards/marcxml/"] + metadata_format = PARSER.from_string(metadata_format_xml, MetadataFormat) + assert isinstance(metadata_format, MetadataFormat) + expected = MetadataFormat( + metadata_prefix="oai_dc", + schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd", + metadata_namespace="http://www.openarchives.org/OAI/2.0/oai_dc/", + ) + assert metadata_format == expected diff --git a/tests/unit/test_response.py b/tests/unit/test_response.py index a7a5690..0fc32f0 100644 --- a/tests/unit/test_response.py +++ b/tests/unit/test_response.py @@ -7,50 +7,30 @@ from typing import TYPE_CHECKING import pytest -from lxml import etree -from oaipmh_scythe.response import OAIResponse +from oaipmh_scythe.exceptions import IdDoesNotExist +from oaipmh_scythe.models.oai_pmh import Identify, OaiPmh, OaiPmherror, OaiPmherrorcode +from oaipmh_scythe.response import Response, _build_response, raise_for_error if TYPE_CHECKING: - from pytest_mock import MockerFixture + import httpx -IDENTIFY_XML: str = """ - - 2023-11-09T09:53:46Z - https://zenodo.org/oai2d - - Zenodo - https://zenodo.org/oai2d - 2.0 - - -""" +def test_build_response(identify_response: httpx.Response) -> None: + response = _build_response(identify_response, metadata_prefix="oai_dc") + assert isinstance(response, Response) + assert isinstance(response.parsed, OaiPmh) + assert response.status_code == identify_response.status_code + assert response.content == identify_response.content + assert isinstance(response.parsed.identify, Identify) + assert response.parsed.identify.repository_name == "Zenodo" -@pytest.fixture() -def mock_response(mocker: MockerFixture): - response = mocker.Mock() - response.text = IDENTIFY_XML - response.content = response.text.encode() - return response +def test_raise_for_error_no_errors() -> None: + assert raise_for_error(None) is None -def test_oai_response_raw(mock_response) -> None: - params = {"verb": "Identify"} - oai_response = OAIResponse(http_response=mock_response, params=params) - assert oai_response.raw == mock_response.text - - -def test_oai_response_xml(mock_response): - params = {"verb": "Identify"} - oai_response = OAIResponse(http_response=mock_response, params=params) - assert isinstance(oai_response.xml, etree._Element) - assert oai_response.xml.tag == "{http://www.openarchives.org/OAI/2.0/}OAI-PMH" - - -def test_oai_response_str(mock_response): - params = {"verb": "Identify"} - oai_response = OAIResponse(http_response=mock_response, params=params) - assert str(oai_response) == "" +def test_raise_for_error() -> None: + error = OaiPmherror(code=OaiPmherrorcode.ID_DOES_NOT_EXIST, value="No matching identifier") + with pytest.raises(IdDoesNotExist): + raise_for_error([error]) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index b362424..24b1c8d 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -3,22 +3,14 @@ # # SPDX-License-Identifier: BSD-3-Clause -import pytest -from lxml import etree - -from oaipmh_scythe.utils import filter_dict_except_resumption_token, get_namespace, remove_none_values, xml_to_dict - +from typing import TYPE_CHECKING -@pytest.fixture() -def xml_element_with_namespace() -> etree._Element: - xml = 'https://zenodo.org/oai2d' - return etree.fromstring(xml) +import pytest +if TYPE_CHECKING: + from pytest_mock.plugin import MockerFixture -@pytest.fixture() -def xml_element_without_namespace() -> etree._Element: - xml = 'https://zenodo.org/oai2d' - return etree.fromstring(xml) +from oaipmh_scythe.utils import filter_dict_except_resumption_token, load_models, remove_none_values def test_remove_none_values() -> None: @@ -46,38 +38,8 @@ def test_filter_dict_except_resumption_token_noop() -> None: assert result == d -def test_get_namespace(xml_element_with_namespace: etree._Element) -> None: - namespace = get_namespace(xml_element_with_namespace) - assert namespace == "{http://www.openarchives.org/OAI/2.0/}" - - -def test_get_namespace_without_namespace(xml_element_without_namespace: etree._Element) -> None: - namespace = get_namespace(xml_element_without_namespace) - assert namespace is None - - -def test_xml_to_dict_default(xml_element_with_namespace: etree._Element) -> None: - result = xml_to_dict(xml_element_with_namespace) - expected = {"{http://www.openarchives.org/OAI/2.0/}request": ["https://zenodo.org/oai2d"]} - assert result == expected - - -def test_xml_to_dict_with_paths(xml_element_with_namespace: etree._Element) -> None: - result = xml_to_dict(xml_element_with_namespace, paths=["./{http://www.openarchives.org/OAI/2.0/}request"]) - expected = { - "{http://www.openarchives.org/OAI/2.0/}request": ["https://zenodo.org/oai2d"], - } - assert result == expected - - -def test_xml_to_dict_with_nsmap(xml_element_with_namespace: etree._Element) -> None: - nsmap = {"oai": "http://www.openarchives.org/OAI/2.0/"} - result = xml_to_dict(xml_element_with_namespace, paths=["oai:request"], nsmap=nsmap) - expected = {"{http://www.openarchives.org/OAI/2.0/}request": ["https://zenodo.org/oai2d"]} - assert result == expected - - -def test_xml_to_dict_strip_namespace(xml_element_with_namespace: etree._Element) -> None: - result = xml_to_dict(xml_element_with_namespace, strip_ns=True) - expected = {"request": ["https://zenodo.org/oai2d"]} - assert result == expected +@pytest.mark.parametrize("metadata_prefix", ["oai_dc", "datacite"]) +def test_load_models(mocker: "MockerFixture", metadata_prefix: str) -> None: + mock_import = mocker.patch("builtins.__import__") + load_models(metadata_prefix) + mock_import.assert_called_once()