Skip to content

Commit

Permalink
feat!: add xsdata models
Browse files Browse the repository at this point in the history
  • Loading branch information
afuetterer committed Apr 20, 2024
1 parent 6e9365b commit 4f7a02a
Show file tree
Hide file tree
Showing 27 changed files with 3,233 additions and 881 deletions.
12 changes: 8 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,11 @@ repos:
- id: mypy
args: [--config-file=pyproject.toml]
additional_dependencies:
- httpx==0.26.0
- lxml-stubs==0.5.1
exclude: tests
- httpx>=0.27
- lxml-stubs>=0.5
- pytest>=8.1
- xsdata>=24.4
exclude: tests # TODO: remove this exclusion

- repo: https://github.com/scientific-python/cookie
rev: d43e497727162f7edcfade1022237565c88d5347 # frozen: 2024.03.10
Expand All @@ -86,7 +88,9 @@ repos:
hooks:
- id: typos
args: [--force-exclude]
exclude: CHANGELOG.md # the commit hashes in changelog trigger the spell checker
# CHANGELOG.md: the commit hashes in changelog trigger the spell checker
# src/oaipmh_scythe/models: autogenerated python modules by xsdata
exclude: ^CHANGELOG.md|^src/oaipmh_scythe/models/.*

- repo: https://github.com/FHPythonUtils/LicenseCheck/
rev: b2b50f4d40c95b15478279a7a00553a1dc2925ef # frozen: 2024.2
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ with Scythe("https://zenodo.org/oai2d") as scythe:
`oaipmh-scythe` is built with:

- [httpx](https://github.com/encode/httpx) for issuing HTTP requests
- [lxml](https://github.com/lxml/lxml) for parsing XML responses
- [xsdata](https://github.com/tefra/xsdata) for parsing XML responses

## Installation

Expand Down
10 changes: 9 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dynamic = [
]
dependencies = [
"httpx>=0.25",
"lxml>=5.1",
"xsdata[cli,lxml]", # TODO: remove cli extra
]
[project.optional-dependencies]
dev = [
Expand Down Expand Up @@ -169,6 +169,13 @@ pydocstyle.convention = "google"
"src/oaipmh_scythe/client.py" = [
"PLR0913", # too-many-arguments
]
"src/oaipmh_scythe/models/*" = [
"D101", # undocumented-public-class
"D106", # undocumented-public-nested-class
"D205",
"D415",
"RUF002",
]
"tests/*" = [
"D100", # undocumented-public-module
"D103", # undocumented-public-function
Expand Down Expand Up @@ -212,6 +219,7 @@ parallel = true
source = ["oaipmh_scythe"]
omit = [
"__about__.py",
"src/oaipmh_scythe/models/datacite.py",
]

[tool.coverage.report]
Expand Down
4 changes: 2 additions & 2 deletions src/oaipmh_scythe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
"""oaipmh-scythe: A Scythe for harvesting OAI-PMH repositories."""

from oaipmh_scythe.client import Scythe
from oaipmh_scythe.response import OAIResponse
from oaipmh_scythe.response import Response

__all__ = [
"Scythe",
"OAIResponse",
"Response",
]
58 changes: 20 additions & 38 deletions src/oaipmh_scythe/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

from oaipmh_scythe.__about__ import __version__
from oaipmh_scythe.iterator import BaseOAIIterator, OAIItemIterator
from oaipmh_scythe.models import Header, Identify, MetadataFormat, OAIItem, Record, Set
from oaipmh_scythe.response import OAIResponse
from oaipmh_scythe.models import Header, Identify, MetadataFormat, Record, Set, Verb
from oaipmh_scythe.response import Response, _build_response
from oaipmh_scythe.utils import filter_dict_except_resumption_token, log_response, remove_none_values

if TYPE_CHECKING:
Expand All @@ -34,18 +34,6 @@
logger = logging.getLogger(__name__)

USER_AGENT: str = f"oaipmh-scythe/{__version__}"
OAI_NAMESPACE: str = "{http://www.openarchives.org/OAI/2.0/}"


# Map OAI verbs to class representations
DEFAULT_CLASS_MAP = {
"GetRecord": Record,
"ListRecords": Record,
"ListIdentifiers": Header,
"ListSets": Set,
"ListMetadataFormats": MetadataFormat,
"Identify": Identify,
}


class Scythe:
Expand Down Expand Up @@ -82,7 +70,6 @@ def __init__(
max_retries: int = 0,
retry_status_codes: Iterable[int] | None = None,
default_retry_after: int = 60,
class_mapping: dict[str, type[OAIItem]] | None = None,
encoding: str = "utf-8",
auth: AuthTypes | None = None,
timeout: int = 60,
Expand All @@ -98,8 +85,6 @@ def __init__(
self.max_retries = max_retries
self.retry_status_codes = retry_status_codes or (503,)
self.default_retry_after = default_retry_after
self.oai_namespace = OAI_NAMESPACE
self.class_mapping = class_mapping or DEFAULT_CLASS_MAP
self.encoding = encoding
self.auth = auth
self.timeout = timeout
Expand Down Expand Up @@ -149,7 +134,7 @@ def __exit__(
) -> None:
self.close()

def harvest(self, query: dict[str, str]) -> OAIResponse:
def harvest(self, query: dict[str, str]) -> Response:
"""Perform an HTTP request to the OAI server with the given parameters.
Send an OAI-PMH request to the server using the specified parameters. Handle retry logic
Expand All @@ -171,8 +156,8 @@ def harvest(self, query: dict[str, str]) -> OAIResponse:
logger.warning("HTTP %d! Retrying after %d seconds...", http_response.status_code, retry_after)
time.sleep(retry_after)
http_response = self._request(query)
http_response.raise_for_status()
return OAIResponse(http_response, params=query)
metadata_prefix = query.get("metadataPrefix")
return _build_response(http_response, metadata_prefix)

def _request(self, query: dict[str, str]) -> httpx.Response:
"""Send an HTTP request to the OAI server using the configured HTTP method and given query parameters.
Expand All @@ -195,7 +180,7 @@ def list_records(
set_: str | None = None,
resumption_token: str | None = None,
ignore_deleted: bool = False,
) -> Iterator[OAIResponse | Record]:
) -> Iterator[Response | Record]:
"""Issue a ListRecords request to the OAI server.
Send a request to list records from the OAI server, allowing for selective harvesting based on date range,
Expand Down Expand Up @@ -224,7 +209,7 @@ def list_records(
"""
_query = {
"verb": "ListRecords",
"verb": Verb.LIST_RECORDS.value,
"from": from_,
"until": until,
"metadataPrefix": metadata_prefix,
Expand All @@ -242,7 +227,7 @@ def list_identifiers(
set_: str | None = None,
resumption_token: str | None = None,
ignore_deleted: bool = False,
) -> Iterator[OAIResponse | Header]:
) -> Iterator[Response | Header]:
"""Issue a ListIdentifiers request to the OAI server.
Send a request to list record identifiers from the OAI server. This method allows filtering records based on
Expand All @@ -268,10 +253,9 @@ def list_identifiers(
cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server.
noRecordsMatch: If no records match the provided criteria.
noSetHierarchy: If set-based harvesting is requested but the OAI server does not support sets.
"""
_query = {
"verb": "ListIdentifiers",
"verb": Verb.LIST_IDENTIFIERS.value,
"from": from_,
"until": until,
"metadataPrefix": metadata_prefix,
Expand All @@ -282,7 +266,7 @@ def list_identifiers(
query = remove_none_values(filter_dict_except_resumption_token(_query))
yield from self.iterator(self, query, ignore_deleted=ignore_deleted)

def list_sets(self, resumption_token: str | None = None) -> Iterator[OAIResponse | Set]:
def list_sets(self, resumption_token: str | None = None) -> Iterator[Response | Set]:
"""Issue a ListSets request to the OAI server.
Send a request to list all sets defined in the OAI server. Sets are used to categorize records in the OAI
Expand All @@ -300,10 +284,9 @@ def list_sets(self, resumption_token: str | None = None) -> Iterator[OAIResponse
Raises:
badResumptionToken: If the provided resumption token is invalid or expired.
noSetHierarchy: If the OAI server does not support sets or has no set hierarchy available.
"""
_query = {
"verb": "ListSets",
"verb": Verb.LIST_SETS.value,
"resumptionToken": resumption_token,
}
query = remove_none_values(filter_dict_except_resumption_token(_query))
Expand All @@ -321,12 +304,12 @@ def identify(self) -> Identify:
Returns:
Identify: An object encapsulating the server's identify response, which contains various pieces of information
about the OAI server.
"""
query = {"verb": "Identify"}
return Identify(self.harvest(query))
query = {"verb": Verb.IDENTIFY.value}
response = self.harvest(query)
return response.parsed.identify

def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> OAIResponse | Record:
def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> Response | Record:
"""Issue a GetRecord request to the OAI server.
Send a request to the OAI server to retrieve a specific record. The request is constructed with the provided
Expand All @@ -347,16 +330,16 @@ def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> OAIRes
cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server
for the requested record.
idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server.
"""
query = {
"verb": "GetRecord",
"verb": Verb.GET_RECORD.value,
"identifier": identifier,
"metadataPrefix": metadata_prefix,
}
return next(iter(self.iterator(self, query)))
response = self.harvest(query)
return response.parsed.get_record.record

def list_metadata_formats(self, identifier: str | None = None) -> Iterator[OAIResponse | MetadataFormat]:
def list_metadata_formats(self, identifier: str | None = None) -> Iterator[Response | MetadataFormat]:
"""Issue a ListMetadataFormats request to the OAI server.
Send a request to list the metadata formats available from the OAI server. This can be done for the entire
Expand All @@ -377,10 +360,9 @@ def list_metadata_formats(self, identifier: str | None = None) -> Iterator[OAIRe
Raises:
idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server.
noMetadataFormats: If there are no metadata formats available for the requested record or repository.
"""
_query = {
"verb": "ListMetadataFormats",
"verb": Verb.LIST_METADATA_FORMATS.value,
"identifier": identifier,
}
query = remove_none_values(_query)
Expand Down
Loading

0 comments on commit 4f7a02a

Please sign in to comment.