diff --git a/README.md b/README.md index 92b9af6..2749048 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ This is a community maintained fork of the original [sickle](https://github.com/ | --- | --- | | CI | [![ci][ci-badge]][ci-workflow] [![coverage][coverage-badge]][ci-workflow] | | Docs | [![docs][docs-badge]][docs-workflow] | -| Meta | [![OpenSSF Scorecard][scorecard-badge]][scorecard-url] [![hatch][hatch-badge]][hatch] [![pre-commit enabled][pre-commit-badge]][pre-commit] [![ruff][ruff-badge]][ruff] [![License][license-badge]][license] | +| Meta | [![OpenSSF Scorecard][scorecard-badge]][scorecard-url] [![hatch][hatch-badge]][hatch] [![pre-commit enabled][pre-commit-badge]][pre-commit] [![ruff][ruff-badge]][ruff] [![mypy][mypy-badge]][mypy] [![License][license-badge]][license] | oaipmh-scythe is a lightweight [OAI-PMH](http://www.openarchives.org/OAI/openarchivesprotocol.html) client library written in Python. It has been designed for conveniently retrieving data from OAI interfaces the Pythonic way: @@ -68,5 +68,7 @@ oaipmh-scythe is distributed under the terms of the [BSD](https://spdx.org/licen [pre-commit-badge]: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white [ruff]: https://github.com/charliermarsh/ruff [ruff-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json +[mypy]: https://mypy-lang.org +[mypy-badge]: https://img.shields.io/badge/types-mypy-blue.svg [test-pypi]: https://test.pypi.org/ [pip]: https://pip.pypa.io/ diff --git a/pyproject.toml b/pyproject.toml index 6378231..37fcfd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,12 +183,6 @@ show_column_numbers = true show_error_codes = true show_error_context = true -[[tool.mypy.overrides]] -module = [ - "oaipmh_scythe.*", -] -ignore_errors = true - # licensecheck # Ref: https://github.com/FHPythonUtils/LicenseCheck/#example-1-pyprojecttoml # ------------------------------------------------------------------------------ diff --git a/src/oaipmh_scythe/iterator.py b/src/oaipmh_scythe/iterator.py index 4aeeece..4407a66 100644 --- a/src/oaipmh_scythe/iterator.py +++ b/src/oaipmh_scythe/iterator.py @@ -63,19 +63,19 @@ def _get_resumption_token(self) -> ResumptionToken | None: ): return ResumptionToken( token=token_element.text, - cursor=token_element.attrib.get("cursor"), - complete_list_size=token_element.attrib.get("completeListSize"), - expiration_date=token_element.attrib.get("expirationDate"), + cursor=token_element.attrib.get("cursor"), # type: ignore [arg-type] + complete_list_size=token_element.attrib.get("completeListSize"), # type: ignore [arg-type] + expiration_date=token_element.attrib.get("expirationDate"), # type: ignore [arg-type] ) return None def _next_response(self) -> None: - if self.resumption_token: + if self.resumption_token and self.resumption_token.token: self.params = {"resumptionToken": self.resumption_token.token, "verb": self.verb} self.oai_response = self.scythe.harvest(**self.params) if (error := self.oai_response.xml.find(f".//{self.scythe.oai_namespace}error")) is not None: - code = error.attrib.get("code", "UNKNOWN") + code = str(error.attrib.get("code", "UNKNOWN")) description = error.text or "" try: exception_name = code[0].upper() + code[1:] diff --git a/src/oaipmh_scythe/models.py b/src/oaipmh_scythe/models.py index 6a61033..5191485 100644 --- a/src/oaipmh_scythe/models.py +++ b/src/oaipmh_scythe/models.py @@ -67,7 +67,10 @@ class Identify(OAIItem): def __init__(self, identify_response: OAIResponse) -> None: super().__init__(identify_response.xml, strip_ns=True) - self.xml = self.xml.find(".//" + self._oai_namespace + "Identify") + identify_element = self.xml.find(f".//{self._oai_namespace}Identify") + if identify_element is None: + raise ValueError("Identify element not found in the XML.") + self.xml = identify_element self._identify_dict = xml_to_dict(self.xml, strip_ns=True) for k, v in self._identify_dict.items(): setattr(self, k.replace("-", "_"), v[0]) @@ -88,17 +91,15 @@ class Header(OAIItem): def __init__(self, header_element: etree._Element) -> None: super().__init__(header_element, strip_ns=True) self.deleted = self.xml.attrib.get("status") == "deleted" - _identifier_element = self.xml.find(self._oai_namespace + "identifier") - _datestamp_element = self.xml.find(self._oai_namespace + "datestamp") + _identifier_element = self.xml.find(f"{self._oai_namespace}identifier") + _datestamp_element = self.xml.find(f"{self._oai_namespace}datestamp") self.identifier = getattr(_identifier_element, "text", None) self.datestamp = getattr(_datestamp_element, "text", None) - self.setSpecs = [setSpec.text for setSpec in self.xml.findall(self._oai_namespace + "setSpec")] + self.setSpecs = [setSpec.text for setSpec in self.xml.findall(f"{self._oai_namespace}setSpec")] def __repr__(self) -> str: - if self.deleted: - return f"
" - return f"
" + return f"
" def __iter__(self) -> Iterator: return iter( @@ -120,15 +121,16 @@ class Record(OAIItem): def __init__(self, record_element: etree._Element, strip_ns: bool = True) -> None: super().__init__(record_element, strip_ns=strip_ns) - self.header = Header(self.xml.find(".//" + self._oai_namespace + "header")) + header_element = self.xml.find(f".//{self._oai_namespace}header") + if header_element is None: + raise ValueError("Header element not found in the XML.") + self.header = Header(header_element) self.deleted = self.header.deleted if not self.deleted: self.metadata = self.get_metadata() def __repr__(self) -> str: - if self.header.deleted: - return f"" - return f"" + return f"" def __iter__(self) -> Iterator: return iter(self.metadata.items()) @@ -153,6 +155,7 @@ class Set(OAIItem): def __init__(self, set_element: etree._Element) -> None: super().__init__(set_element, strip_ns=True) self._set_dict = xml_to_dict(self.xml, strip_ns=True) + self.setName: str | None = None for k, v in self._set_dict.items(): setattr(self, k.replace("-", "_"), v[0]) @@ -167,13 +170,12 @@ class MetadataFormat(OAIItem): """Represents an OAI MetadataFormat. :param mdf_element: The XML element 'metadataFormat'. - :type mdf_element: :class:`lxml.etree._Element` """ def __init__(self, mdf_element: etree._Element) -> None: super().__init__(mdf_element, strip_ns=True) - #: The prefix of this format. self._mdf_dict = xml_to_dict(self.xml, strip_ns=True) + self.metadataPrefix: str | None = None for k, v in self._mdf_dict.items(): setattr(self, k.replace("-", "_"), v[0]) diff --git a/src/oaipmh_scythe/scythe.py b/src/oaipmh_scythe/scythe.py index 2acd783..cecfaa4 100644 --- a/src/oaipmh_scythe/scythe.py +++ b/src/oaipmh_scythe/scythe.py @@ -107,7 +107,7 @@ def __init__( self.class_mapping = class_mapping or DEFAULT_CLASS_MAP self.encoding = encoding self.timeout = timeout - self.request_args = request_args + self.request_args: dict[str, str] = request_args def harvest(self, **kwargs: str) -> OAIResponse: """Make HTTP requests to the OAI server. @@ -126,12 +126,12 @@ def harvest(self, **kwargs: str) -> OAIResponse: http_response.encoding = self.encoding return OAIResponse(http_response, params=kwargs) - def _request(self, kwargs: str) -> Response: + def _request(self, kwargs: dict[str, str]) -> Response: headers = {"user-agent": USER_AGENT} with httpx.Client(headers=headers, timeout=self.timeout) as client: if self.http_method == "GET": - return client.get(self.endpoint, params=kwargs, **self.request_args) - return client.post(self.endpoint, data=kwargs, **self.request_args) + return client.get(self.endpoint, params=kwargs, **self.request_args) # type: ignore [arg-type] + return client.post(self.endpoint, data=kwargs, **self.request_args) # type: ignore [arg-type] def list_records(self, ignore_deleted: bool = False, **kwargs: str) -> Iterator[OAIResponse | OAIItem]: """Issue a ListRecords request.