diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml new file mode 100644 index 0000000..74a3f45 --- /dev/null +++ b/.github/workflows/gh-pages.yml @@ -0,0 +1,58 @@ +name: Build and deploy gh-pages branch with Mkdocs + +on: + push: + # Runs only if documentation is changed + paths: + - 'mkdocs.yml' + - 'docs/**' + # Runs every time main branch is updated + branches: ["main"] + # Runs every time a PR is open against main + pull_request: + branches: ["main"] + workflow_dispatch: + +concurrency: + # Prevent 2+ copies of this workflow from running concurrently + group: dts-docs-action + +jobs: + Build-and-Deploy-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + fetch-depth: 0 # Needed, or else gh-pages won't be fetched, and push rejected + submodules: false # speeds up clone and not building anything in submodules + + - name: Show action trigger + run: echo "= The job was automatically triggered by a ${{github.event_name}} event." + + - name: Setting up Python 3.12 + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Installing dtspy dependencies (${{ matrix.os }}) + run: python3 -m pip install -r requirements.txt + + - name: Build + run: mkdocs build --verbose + + - if: ${{ github.event_name == 'push' }} + name: GitHub Pages action + uses: JamesIves/github-pages-deploy-action@v4 + with: + # Do not remove existing pr-preview pages + clean-exclude: pr-preview + folder: ./site/ + + # If it's a PR from within the same repo, deploy to a preview page + # For security reasons, PRs from forks cannot write into gh-pages for now + - if: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository }} + name: Preview docs + uses: rossjrw/pr-preview-action@v1 + with: + source-dir: ./site/ diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..8947612 --- /dev/null +++ b/docs/api.md @@ -0,0 +1 @@ +::: dts diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..00359b3 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,2 @@ +# dtspy: a Python client for the [Data Transfer System](https://kbase.github.io/dts/) + diff --git a/dts/__init__.py b/dts/__init__.py index 51747b9..a1500e3 100644 --- a/dts/__init__.py +++ b/dts/__init__.py @@ -1,4 +1,13 @@ -"""dts: a client for the Data Transfer Service""" +"""dts: a Python client for the Data Transfer Service. + +The [Data Transfer System (DTS)](https://kbase.github.io/dts/) offers a federated +search capability for participating organizations in the DOE Biological and +Environmental Research program, and allows the transfer of related data and +metadata between these organizations. + +DTS API documentation is available [here](https://lb-dts.staging.kbase.us/docs#/). + +""" from .database import Database from .transfer_status import TransferStatus diff --git a/dts/client.py b/dts/client.py index d84265e..89eabf4 100644 --- a/dts/client.py +++ b/dts/client.py @@ -11,30 +11,48 @@ from urllib.error import ( HTTPError, ) +from typing import Any logger = logging.getLogger('dts') api_version = 1 class KBaseAuth(AuthBase): """Attaches a KBase-sensible Authorization header to the given Request object.""" - def __init__(self, api_key): + def __init__(self: "KBaseAuth", api_key: str) -> None: self.api_key = api_key - def __call__(self, request): + def __call__(self: "KBaseAuth", request): b64_token = base64.b64encode(bytes(self.api_key + '\n', 'utf-8')) token = b64_token.decode('utf-8') request.headers['Authorization'] = f'Bearer {token}' return request class Client: - """`Client`: A client for performing file transfers with the Data Transfer System""" - def __init__(self, - api_key = None, - server = None, - port = None): - """`Client(server = None, port = None, api_key = None)` -> DTS client. - -* If no `server` is given, you must call `connect` on the created client.""" + """`dts.Client`: A client for performing file transfers with the Data Transfer System (DTS). + +This type exposes the [DTS API](https://lb-dts.staging.kbase.us/docs#/) for use +in Python programs. +""" + def __init__(self: "Client", + api_key: str | None = None, + server: str | None = None, + port: int | None = None) -> None: + """Creates a DTS client that handles search and transfer requests via +a connected server. + +If no server is specified, you must call `connect` on the created client. + +Args: + api_key: An unencoded KBase developer token. + server: The DTS server that handles the client's API requests. + port: The port to which the client connects with the server. + +Returns: + a `dts.Client` instance. + +Raises: + TypeError: an argument of improper type was specified. +""" if server: self.connect(server = server, port = port, api_key = api_key) else: @@ -42,14 +60,22 @@ def __init__(self, self.name = None self.version = None - def connect(self, - api_key = None, - server = None, - port = None): - """`client.connect(api_key = None, server = None, port = None)` + def connect(self: "Client", + api_key: str | None = None, + server: str | None = None, + port: int | None = None) -> None: + """Connects the client to the given DTS server via the given port using the given +(unencoded) KBase developer token. + +Args: + api_key: An unencoded KBase developer token. + server: The DTS server that handles the client's API requests. + port: The port to which the client connects with the server. -* Connects the client to the given DTS `server` via the given `port` using the given - (unencoded) `api_key`.""" +Raises: + TypeError: an argument of improper type was specified. + urllib3.exceptions.ConnectionError: the client was unable to connect to the DTS server. +""" if not isinstance(api_key, str): raise TypeError('api_key must be an unencoded API key.') if not isinstance(server, str): @@ -69,19 +95,22 @@ def connect(self, self.name = result['name'] self.version = result['version'] - def disconnect(self): - """`client.disconnect() -> None - -* disconnects the client from the server.""" + def disconnect(self: "Client") -> None: + """Disconnects the client from the server. +""" self.api_key = None self.uri = None self.name = None self.version = None - def databases(self): - """`client.databases()` -> `list` of `Database` objects + def databases(self: "Client") -> list[Database]: + """Returns all databases available to the service. + +Server-side errors are captured and logged. -* Returns all databases available to the service, or `None` if an error occurs.""" +Returns: + A list of Database objects containing information about available databases. +""" if not self.uri: raise RuntimeError('dts.Client: not connected.') try: @@ -89,57 +118,55 @@ def databases(self): response.raise_for_status() except HTTPError as http_err: logger.error(f'HTTP error occurred: {http_err}') - return None + return [] except Exception as err: logger.error(f'Other error occurred: {err}') - return None + return [] results = response.json() return [Database(id = r['id'], name = r['name'], organization = r['organization'], url = r['url']) for r in results] - def search(self, - database = None, - query = None, - status = None, - offset = 0, - limit = None, - specific = None, - ): - """ -`client.search(database = None, - query = None, - status = None, - offset = 0, - limit = None, - specific = None) -> `list` of `frictionless.DataResource` objects - -* Performs a synchronous search of the database with the given name using the - given query string. -Optional arguments: - * query: a search string that is directly interpreted by the database - * status: filters for files based on their status: - * `"staged"` means "search only for files that are already in the source database staging area" - * `"unstaged"` means "search only for files that are not staged" - * offset: a 0-based index from which to start retrieving results (default: 0) - * limit: if given, the maximum number of results to retrieve - * specific: a dictionary mapping database-specific search parameters to their values + def search(self: "Client", + database: str, + query: str | int | float, + status: str | None = None, + offset: int = 0, + limit: int | None = None, + specific: dict[str, Any] | None = None, + ) -> list[JsonResource]: + """Performs a synchronous search of the database with the given name using the given query string. + +This method searches the indicated database for files that can be transferred. + +Args: + database: A string containing the name of the database to search. + query: A search string that is directly interpreted by the database. + status: An optional string (`"staged"` or `"unstaged"`) indicating whether files are filtered based on their status. + offset: An optional 0-based pagination index indicating the first retrieved result (default: 0). + limit: An optional pagination parameter indicating the maximum number of results to retrieve. + specific: An optional dictionary mapping database-specific search parameters to their values. + +Returns: + A list of [frictionless DataResources](https://specs.frictionlessdata.io/data-resource/) containing metadata for files matching the query. + +Raises: + RuntimeError: Indicates an issue with the DTS client and its connection to the server. + TypeError: Indicates that an argument passed to the client isn't of the proper type. + ValueError: Indicates that an argument passed to the client has an invalid value. """ if not self.uri: raise RuntimeError('dts.Client: not connected.') - if query: - if not isinstance(query, str): - # we also accept numeric values - if isinstance(query, int) or isinstance(query, float): - query = str(query) - else: - raise RuntimeError('search: query must be a string or a number.') - else: - raise RuntimeError('search: missing query.') + if not isinstance(query, str): + # we also accept numeric values + if isinstance(query, int) or isinstance(query, float): + query = str(query) + else: + raise TypeError('search: query must be a string or a number.') if not isinstance(database, str): raise TypeError('search: database must be a string.') - params = { + params: dict[str, Any] = { 'database': database, 'query': query, } @@ -170,29 +197,34 @@ def search(self, response.raise_for_status() except (HTTPError, requests.exceptions.HTTPError) as err: logger.error(f'HTTP error occurred: {err}') - return None + return [] except Exception as err: logger.error(f'Other error occurred: {err}') - return None + return [] return [JsonResource(r) for r in response.json()['resources']] - def fetch_metadata(self, - database = None, - ids = None, - offset = 0, - limit = None, - ): - """ -`client.fetch_metadata(database = None, - ids = None, - offset = 0, - limit = None) -> `list` of `frictionless.DataResource` objects - -* Fetches metadata for the files with the specified IDs within the specified - database. -Optional arguments: - * offset: a 0-based index from which to start retrieving results (default: 0) - * limit: if given, the maximum number of results to retrieve + def fetch_metadata(self: "Client", + database: str, + ids: list[str], + offset: int = 0, + limit: int | None = None) -> list[JsonResource]: + """Fetches metadata for the files with the specified IDs within the specified database. + +Server-side errors are intercepted and logged. + +Args: + database: A string containing the name of the database to search. + ids: A list containing file identifiers for which metadata is retrieved. + offset: An optional 0-based pagination index from which to start retrieving results (default: 0). + limit: An optional pagination parameter indicating the maximum number of results to retrieve. + +Returns: + A list of [frictionless DataResources](https://specs.frictionlessdata.io/data-resource/) containing metadata for files with the requested IDs. + +Raises: + RuntimeError: Indicates an issue with the DTS client and its connection to the server. + TypeError: Indicates that an argument passed to the client isn't of the proper type. + ValueError: Indicates that an argument passed to the client has an invalid value. """ if not self.uri: raise RuntimeError('dts.Client: not connected.') @@ -200,7 +232,7 @@ def fetch_metadata(self, raise RuntimeError('search: missing or invalid file IDs.') if not isinstance(database, str): raise TypeError('search: database must be a string.') - params = { + params: dict[str, Any] = { 'database': database, 'ids': ','.join(ids), } @@ -223,32 +255,38 @@ def fetch_metadata(self, response.raise_for_status() except (HTTPError, requests.exceptions.HTTPError) as err: logger.error(f'HTTP error occurred: {err}') - return None + return [] except Exception as err: logger.error(f'Other error occurred: {err}') - return None + return [] return [JsonResource(r) for r in response.json()['resources']] - def transfer(self, - file_ids = None, - source = None, - destination = None, - description = None, - timeout = None): - """ -`client.transfer(file_ids = None, - source = None, - destination = None, - description = None, - instructions = None, - timeout = None) -> UUID - -* Submits a request to transfer files from a source to a destination database. the - files in the source database are identified by a list of string file_ids. -Optional arguments: - * description: a string containing Markdown text describing the transfer - * instructions: a dict representing a JSON object containing instructions - for processing the payload at its destination + def transfer(self: "Client", + file_ids: list[str], + source: str, + destination: str, + description: str | None = None, + instructions: dict[str, Any] | None = None, + timeout: int | None = None) -> uuid.UUID | None: + """Submits a request to transfer files from a source to a destination database. + +Server-side errors are intercepted and logged. + +Args: + file_ids: A list of identifiers for files to be transferred. + source: The name of the database from which files are transferred. + destination: The name of the database to which files are transferred. + description: An optional string containing human-readable Markdown text describing the transfer. + instructions: An optional dict representing a JSON object containing instructions for processing the payload at its destination. + timeout: An optional integer indicating the number of seconds to wait for a response from the server. + +Returns: + A UUID uniquely identifying the file transfer that can be used to check its status, or None if a server-side error is encountered. + +Raises: + RuntimeError: Indicates an issue with the DTS client and its connection to the server. + TypeError: Indicates that an argument passed to the client isn't of the proper type. + ValueError: Indicates that an argument passed to the client has an invalid value. """ if not self.uri: raise RuntimeError('dts.Client: not connected.') @@ -287,20 +325,33 @@ def transfer(self, return None return uuid.UUID(response.json()["id"]) - def transfer_status(self, id): - """`client.transfer_status(id)` -> TransferStatus - -* Returns status information for the transfer with the given identifier. - Possible statuses are: - * `'staging'`: the files requested for transfer are being copied to the staging - area for the source database job - * `'active'`: the files are being transferred from the source database to the - destination database - * `'finalizing'`: the files have been transferred and a manifest is being written - * `'inactive'`: the file transfer has been suspended - * `'failed'`: the file transfer could not be completed because of a failure` - * `'unknown'`: the status of the given transfer is unknown -* If an error is encountered, returns `None`.""" + def transfer_status(self: "Client", + id: uuid.UUID) -> TransferStatus | None: + """Returns status information for the transfer with the given identifier. + + +Server-side errors are intercepted and logged. Possible transfer statuses are: + +* `'staging'`: The files requested for transfer are being copied to the staging + area for the source database job. +* `'active'`: The files are being transferred from the source database to the + destination database. +* `'finalizing'`: The files have been transferred and a manifest is being written. +* `'inactive'`: The file transfer has been suspended. +* `'failed'`: The file transfer could not be completed because of a failure. +* `'unknown'`: The status of the given transfer is unknown. + +Arguments: + id: A UUID that uniquely identifies the transfer operation for which the status is requested. + +Returns: + A `TransferStatus` object whose contents indicate the status of the transfer, or None if a server-side error occurs. + +Raises: + RuntimeError: Indicates an issue with the DTS client and its connection to the server. + TypeError: Indicates that an argument passed to the client isn't of the proper type. + ValueError: Indicates that an argument passed to the client has an invalid value. +""" if not self.uri: raise RuntimeError('dts.Client: not connected.') try: @@ -308,7 +359,7 @@ def transfer_status(self, id): auth=self.auth) response.raise_for_status() except (HTTPError, requests.exceptions.HTTPError) as err: - logger.error(f'HTTP error occurred: {http_err}') + logger.error(f'HTTP error occurred: {err}') return None except Exception as err: logger.error(f'Other error occurred: {err}') @@ -322,11 +373,20 @@ def transfer_status(self, id): num_files_transferred = results.get('num_files_transferred'), ) - def cancel_transfer(self, id): - """ -`client.cancel_transfer(id) -> None + def cancel_transfer(self: "Client", + id: uuid.UUID) -> None: + """Cancels a file transfer with the requested UUID. + +Status information for the cancelled transfer is retained for a time so its +cancellation can be seen. + +Args: + id: A UUID that uniquely identifies the transfer operation to be cancelled. -* Deletes a file transfer, canceling +Raises: + RuntimeError: Indicates an issue with the DTS client and its connection to the server. + TypeError: Indicates that an argument passed to the client isn't of the proper type. + ValueError: Indicates that an argument passed to the client has an invalid value. """ if not self.uri: raise RuntimeError('dts.Client: not connected.') @@ -335,14 +395,14 @@ def cancel_transfer(self, id): auth=self.auth) response.raise_for_status() except (HTTPError, requests.exceptions.HTTPError) as err: - logger.error(f'HTTP error occurred: {http_err}') + logger.error(f'HTTP error occurred: {err}') return None except Exception as err: logger.error(f'Other error occurred: {err}') return None return None - def __repr__(self): + def __repr__(self: "Client") -> str: if self.uri: return f""" dts.Client(uri = {self.uri}, diff --git a/dts/database.py b/dts/database.py index 5b29b12..255109b 100644 --- a/dts/database.py +++ b/dts/database.py @@ -1,8 +1,13 @@ from dataclasses import dataclass @dataclass(slots = True) -class Database(object): - """`Database` - A database storing files that can be selected and transferred""" +class Database: + """`Database` - A database storing files that can be selected and transferred. + +This type holds human-readable information about databases available to DTS. +Objects of this type are returned by calls to the DTS API, so it is not +necessary to construct them directly. +""" id: str name: str organization: str diff --git a/dts/transfer_status.py b/dts/transfer_status.py index c2dd588..756ba23 100644 --- a/dts/transfer_status.py +++ b/dts/transfer_status.py @@ -2,8 +2,13 @@ from typing import Optional @dataclass(slots = True) -class TransferStatus(object): - """`TransferStatus` - holds status information for a file transfer""" +class TransferStatus: + """`TransferStatus` status information for a file transfer. + +This type holds information pertaining to the transfer of a payload initiated +via the DTS. Objects of this type are returned by calls to the DTS API, so it +is not necessary to create them directly. +""" id: str status: str message: Optional[str] diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..6f3daf8 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,11 @@ +site_name: dtspy - A Python client for the Data Transfer Service + +theme: + name: "material" + +plugins: + - mkdocstrings + +nav: + - Overview: index.md + - API reference: api.md diff --git a/requirements.txt b/requirements.txt index bc090c0..5d36b0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,29 +1,49 @@ annotated-types==0.7.0 attrs==23.2.0 -certifi==2024.7.4 +babel==2.16.0 +certifi==2024.2.2 chardet==5.2.0 charset-normalizer==3.3.2 click==8.1.7 -coverage==7.6.0 +colorama==0.4.6 +coverage==7.6.3 frictionless==5.17.0 +ghp-import==2.1.0 +griffe==1.4.1 humanize==4.9.0 idna==3.7 isodate==0.6.1 Jinja2==3.1.4 jsonschema==4.22.0 jsonschema-specifications==2023.12.1 +Markdown==3.7 markdown-it-py==3.0.0 marko==2.0.3 MarkupSafe==2.1.5 mdurl==0.1.2 +mergedeep==1.3.4 +mkdocs==1.6.1 +mkdocs-autorefs==1.2.0 +mkdocs-get-deps==0.2.0 +mkdocs-material==9.5.40 +mkdocs-material-extensions==1.3.1 +mkdocstrings==0.26.2 +mkdocstrings-python==1.12.1 +packaging==24.1 +paginate==0.5.7 +pathspec==0.12.1 petl==1.7.15 +platformdirs==4.3.6 pydantic==2.7.2 pydantic_core==2.18.3 Pygments==2.18.0 +pymdown-extensions==10.11.2 python-dateutil==2.9.0.post0 python-slugify==8.0.4 PyYAML==6.0.1 +pyyaml_env_tag==0.1 referencing==0.35.1 +regex==2024.9.11 requests==2.32.3 rfc3986==2.0.0 rich==13.7.1 @@ -36,6 +56,7 @@ tabulate==0.9.0 text-unidecode==1.3 typer==0.12.3 typing_extensions==4.12.0 -urllib3==2.2.2 +urllib3==2.2.1 uuid==1.30 validators==0.28.3 +watchdog==5.0.3