feat: set up client (#64)

Fixes #2, fixes #15, fixes #16
afuetterer · May 23, 2024 · 62300bc · 62300bc
1 parent 141208d
commit 62300bc
Show file tree

Hide file tree

Showing 10 changed files with 2,047 additions and 1 deletion.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,7 +13,8 @@ ci:
 # Exclude changelog: auto-generated by python-semantic-release
 exclude: |
   (?x)^(
-      /cassettes/|
+      tests/cassettes/repository.yaml|
+      tests/cassettes/repositories.yaml|
       CHANGELOG.md
   )$
 

diff --git a/README.md b/README.md
@@ -15,6 +15,43 @@
 of Research Data Repositories) [REST API](https://www.re3data.org/api/doc), allowing you to easily retrieve and process
 metadata about research data repositories in a convenient and Pythonic way.
 
+```pycon
+>>> import re3data
+>>> response = re3data.repositories.list()
+>>> print(response)
+<?xml version="1.0" encoding="UTF-8"?>
+<list>
+  <repository>
+    <id>r3d100010468</id>
+    <doi>https://doi.org/10.17616/R3QP53</doi>
+    <name>Zenodo</name>
+    <link href="https://www.re3data.org/api/beta/repository/r3d100010468" rel="self" />
+  </repository>
+... (remaining repositories truncated)
+```
+
+```pycon
+>>> response = re3data.repositories.get("r3d100010468")
+>>> print(response)
+<?xml version="1.0" encoding="utf-8"?>
+<!--re3data.org Schema for the Description of Research Data Repositories. Version 2.2, December 2014. doi:10.2312/re3.006-->
+<r3d:re3data xmlns:r3d="http://www.re3data.org/schema/2-2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.re3data.org/schema/2-2 http://schema.re3data.org/2-2/re3dataV2-2.xsd">
+  <r3d:repository>
+    <r3d:re3data.orgIdentifier>r3d100010468</r3d:re3data.orgIdentifier>
+    <r3d:repositoryName language="eng">Zenodo</r3d:repositoryName>
+    <r3d:repositoryURL>https://zenodo.org/</r3d:repositoryURL>
+... (remaining fields truncated)
+```
+
+## Features
+
+- Pythonic API interactions: Interact with the re3data API in a Pythonic way, without having to worry about low-level
+    HTTP requests or XML parsing.
+- Repository metadata retrieval: Easily fetch and process metadata about research data repositories using
+    `re3data.repositories.list()`.
+- Repository details retrieval: Get detailed information about a specific repository using
+    `re3data.repositories.get(repository_id)`.
+
 ## Requirements
 
 [Python](https://www.python.org/downloads/) >= 3.10

diff --git a/docs/src/api.md b/docs/src/api.md
@@ -1 +1,5 @@
 # API Reference
+
+## `Client`
+
+::: re3data.Client
diff --git a/pyproject.toml b/pyproject.toml
@@ -223,6 +223,7 @@ omit = [
 [tool.coverage.report]
 exclude_also = [
   "if TYPE_CHECKING:",
+  "@abstractmethod",
 ]
 fail_under = 90
 show_missing = true

diff --git a/src/re3data/__init__.py b/src/re3data/__init__.py
@@ -5,7 +5,12 @@
 """python-re3data."""
 
 from re3data.__about__ import __version__
+from re3data._client import Client
 
 __all__ = [
     "__version__",
+    "Client",
 ]
+
+_client = Client()
+repositories = _client.repositories
diff --git a/src/re3data/_client.py b/src/re3data/_client.py
@@ -0,0 +1,156 @@
+# SPDX-FileCopyrightText: 2024 Heinz-Alexander Fütterer
+#
+# SPDX-License-Identifier: MIT
+
+"""The _client module provides a client for interacting with the re3data API."""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+
+import httpx
+
+from re3data import __version__
+
+logger = logging.getLogger(__name__)
+
+BASE_URL: str = "https://www.re3data.org/api/beta/"
+DEFAULT_HEADERS: dict[str, str] = {
+    "Accept": "text/xml; charset=utf-8",
+    "User-Agent": f"python-re3data/{__version__}",
+}
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=10.0)  # timeout in seconds
+
+
+def log_response(response: httpx.Response) -> None:
+    """Log the details of an HTTP response.
+
+    This function logs the HTTP method, URL, and status code of the response for debugging purposes.
+    It uses the 'debug' logging level to provide detailed diagnostic information.
+
+    Args:
+        response: The response object received from an HTTP request.
+
+    Returns:
+        None
+    """
+    logger.debug(
+        "[http] Response: %s %s - Status %s", response.request.method, response.request.url, response.status_code
+    )
+
+
+class RepositoryManager:
+    """A manager for interacting with repositories in the re3data API.
+
+    Attributes:
+        _client: The client used to make requests.
+    """
+
+    def __init__(self, client: Client) -> None:
+        self._client = client
+
+    def list(self, return_type: str = "xml") -> str | httpx.Response:
+        """List the metadata of all repositories in the re3data API.
+
+        Args:
+            return_type: The type of response to expect. Defaults to "xml".
+
+        Returns:
+            A string representation of the response (if `return_type` is "xml") or the full response object.
+        """
+        return self._client._request("repositories", return_type)
+
+    def get(self, repository_id: str, return_type: str = "xml") -> str | httpx.Response:
+        """Get the metadata of a specific repository.
+
+        Args:
+            repository_id: The identifier of the repository to retrieve.
+            return_type: The type of response to expect. Defaults to "xml".
+
+        Returns:
+            A string representation of the response (if `return_type` is "xml") or the full response object.
+        """
+        return self._client._request(f"repository/{repository_id}", return_type)
+
+
+class BaseClient(ABC):
+    """An abstract base class for clients that interact with the re3data API."""
+
+    def __init__(
+        self,
+        client: type[httpx.Client] | type[httpx.AsyncClient],
+    ) -> None:
+        self._client = client(
+            base_url=BASE_URL,
+            headers=DEFAULT_HEADERS,
+            timeout=DEFAULT_TIMEOUT,
+            follow_redirects=True,
+            event_hooks={"response": [log_response]},
+        )
+
+    @abstractmethod
+    def _request(self, endpoint: str, return_type: str) -> str | httpx.Response:
+        pass
+
+
+class Client(BaseClient):
+    """A client that interacts with the re3data API.
+
+    Attributes:
+        _client: The underlying HTTP client.
+        _repository_manager: The repository manager to retrieve metadata from the repositories endpoints.
+
+    Examples:
+        >>> client = Client():
+        >>> response = re3data.repositories.list()
+        >>> print(response)
+        <?xml version="1.0" encoding="UTF-8"?>
+        <list>
+        <repository>
+            <id>r3d100010468</id>
+            <doi>https://doi.org/10.17616/R3QP53</doi>
+            <name>Zenodo</name>
+            <link href="https://www.re3data.org/api/beta/repository/r3d100010468" rel="self" />
+        </repository>
+        ... (remaining repositories truncated)
+    """
+
+    _client: httpx.Client
+
+    def __init__(self) -> None:
+        super().__init__(httpx.Client)
+        self._repository_manager: RepositoryManager = RepositoryManager(self)
+
+    def _request(self, endpoint: str, return_type: str) -> str | httpx.Response:
+        """Send a HTTP GET request to the specified endpoint.
+
+        Args:
+            endpoint: The endpoint to send the request to.
+            return_type: The type of response to expect.
+
+        Returns:
+            A string representation of the response (if `return_type` is "xml") or the full response object.
+
+        Raises:
+            httpx.RequestError: If the request fails or times out.
+            ValueError: If an invalid `return_type` is provided.
+        """
+        response = self._client.get(endpoint)
+        response.raise_for_status()
+        match return_type:
+            case "xml":
+                return response.text
+            case "response":
+                return response
+            case _:
+                raise ValueError(f"Invalid `return_type`: {return_type}. Expected one of: `xml`, `response`.")
+
+    @property
+    def repositories(self) -> RepositoryManager:
+        """Get the repository manager for this client.
+
+        Returns:
+            The repository manager.
+        """
+        return self._repository_manager