kbase · jeff-cohere · Aug 14, 2024 · Jun 5, 2024 · Jun 6, 2024 · Jun 10, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,48 @@
+name: tests
+
+# This action is triggered:
+# 1. when someone creates a pull request for a merge to the main branch
+# 2. when changes are merged into the main branch (via a pull request)
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    container: ${{ matrix.container }}
+
+    # we support Linux and macOS
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+
+    # Steps for running tests and analysis.
+    steps:
+      - name: Checking out repository (${{ matrix.os }})
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          submodules: recursive
+
+      - name: Setting up Python 3.10 (${{ matrix.os }})
+        uses: actions/[email protected]
+        with:
+          python-version: "3.10"
+
+      - name: Installing dtspy dependencies (${{ matrix.os }})
+        run: python3 -m pip install -r requirements.txt
+
+      - name: Running tests (${{ matrix.os }})
+        run: coverage run -m unittest discover
+        env:
+          DTS_KBASE_DEV_TOKEN: ${{ secrets.DTS_KBASE_DEV_TOKEN }}
+
+      # add this when ready
+      #- if: ${{ matrix.os == 'ubuntu-latest' }}
+      #  name: Uploading coverage report to codecov.io
+      #  uses: codecov/[email protected]
+      #  with:
+      #    token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/README.md b/README.md
@@ -1,2 +1,5 @@
 # dtspy
+
+![Tests](https://github.com/kbase/dtspy/actions/workflows/tests.yml/badge.svg)
+
 Python client for the Data Transfer Service
diff --git a/dts/client.py b/dts/client.py
@@ -1,6 +1,5 @@
 import base64
 from frictionless.resources import JsonResource
-import io
 import requests
 from requests.auth import AuthBase
 import logging
@@ -22,8 +21,9 @@ def __init__(self, api_key):
         self.api_key = api_key
 
     def __call__(self, r):
-        token = base64.b64encode(bytes(self.api_key + '\n', 'utf-8'))
-        r.headers['Authorization'] = f'Bearer {token.decode('utf-8')}'
+        b64_token = base64.b64encode(bytes(self.api_key + '\n', 'utf-8'))
+        token = b64_token.decode('utf-8')
+        r.headers['Authorization'] = f'Bearer {token}'
         return r
 
 class Client(object):
@@ -39,6 +39,8 @@ def __init__(self,
             self.connect(server = server, port = port, api_key = api_key)
         else:
             self.uri = None
+            self.name = None
+            self.version = None
 
     def connect(self,
                 api_key = None,
@@ -104,47 +106,69 @@ def search(self,
                status = None,
                offset = 0,
                limit = None,
+               specific = None,
     ):
         """
 `client.search(database = None,
                query = None,
                status = None,
                offset = 0,
-               limit = None) -> `list` of `frictionless.DataResource` objects
+               limit = None,
+               specific = None) -> `list` of `frictionless.DataResource` objects
 
 * Performs a synchronous search of the database with the given name using the
   given query string.
 Optional arguments:
+    * query: a search string that is directly interpreted by the database
     * status: filters for files based on their status:
         * `"staged"` means "search only for files that are already in the source database staging area"
-        * `"archived"` means "search only for files that are archived and not staged"
+        * `"unstaged"` means "search only for files that are not staged"
     * offset: a 0-based index from which to start retrieving results (default: 0)
     * limit: if given, the maximum number of results to retrieve
+    * specific: a dictionary mapping database-specific search parameters to their values
 """
         if not self.uri:
             raise RuntimeError('dts.Client: not connected.')
+        if type(query) != str:
+            raise RuntimeError('search: missing or invalid query.')
         if type(database) != str:
             raise TypeError('search: database must be a string.')
+        if status and status not in ['staged', 'unstaged']:
+            raise TypeError(f'search: invalid status: {status}.')
         if type(offset) != int or offset < 0:
-            raise TypeError('search: invalid offset: %s.'%offset)
+            raise TypeError(f'search: invalid offset: {offset}.')
         if limit:
             if type(limit) != int:
                 raise TypeError('search: limit must be an int.')
             elif limit < 1:
                 raise TypeError(f'search: invalid number of retrieved results: {N}')
+        if specific and type(specific) != dict:
+            raise TypeError('search: specific must be a dict.')
         try:
             params = {
                 'database': database,
                 'query':    query,
-                'status':   status,
-                'offset':   offset,
-                'limit':    limit,
             }
-            response = requests.get(url=f'{self.uri}/files', params=params, auth=self.auth)
+            for name in ['status', 'offset', 'limit']:
+                val = eval(name)
+                if val:
+                    params[name] = val
+            if specific:
+                params['specific'] = specific
+                response = requests.post(url=f'{self.uri}/files',
+                                         json=params,
+                                         auth=self.auth)
+            else:
+                response = requests.get(url=f'{self.uri}/files',
+                                        params=params,
+                                        auth=self.auth)
             response.raise_for_status()
         except HTTPError as http_err:
             logger.error(f'HTTP error occurred: {http_err}')
             return None
+        except requests.exceptions.HTTPError as err:
+            logger.error(f'HTTP error occurred: {err}')
+            return None
         except Exception as err:
             logger.error(f'Other error occurred: {err}')
             return None
@@ -154,11 +178,13 @@ def search(self,
     def transfer(self,
                  file_ids = None,
                  source = None,
-                 destination = None):
+                 destination = None,
+                 timeout = None):
         """
 `client.transfer(file_ids = None,
                  source = None,
-                 destination = None) -> UUID
+                 destination = None,
+                 timeout = None) -> UUID
 
 * Submits a request to transfer files from a source to a destination database. the
   files in the source database are identified by a list of string file_ids.
@@ -170,26 +196,33 @@ def transfer(self,
         if type(destination) != str:
             raise TypeError('transfer: destination database name must be a string.')
         if type(file_ids) != list:
-            raise TypeError('batch: sequences must be a list of string file IDs.')
+            raise TypeError('transfer: file_ids must be a list of string file IDs.')
+        if timeout and type(timeout) != int and type(timeout) != float:
+            raise TypeError('transfer: timeout must be a number of seconds.')
         try:
-            response = requests.post(f'{self.uri}/transfers',
-                                     data={
-                                         source:      source,
-                                         destination: destination,
-                                         file_ids:    file_ids,
-                                     })
+            response = requests.post(url=f'{self.uri}/transfers',
+                                     json={
+                                         'source':      source,
+                                         'destination': destination,
+                                         'file_ids':    file_ids,
+                                     },
+                                     auth=self.auth,
+                                     timeout=timeout)
             response.raise_for_status()
         except HTTPError as http_err:
             logger.error(f'HTTP error occurred: {http_err}')
             return None
+        except requests.exceptions.HTTPError as err:
+            logger.error(f'HTTP error occurred: {err}')
+            return None
         except Exception as err:
             logger.error(f'Other error occurred: {err}')
             return None
         else:
             return uuid.UUID(response.json()["id"])
 
-    def transferStatus(self, id):
-        """`client.transferStatus(id)` -> TransferStatus
+    def transfer_status(self, id):
+        """`client.transfer_status(id)` -> TransferStatus
 
 * Returns status information for the transfer with the given identifier.
   Possible statuses are:
@@ -205,7 +238,8 @@ def transferStatus(self, id):
         if not self.uri:
             raise RuntimeError('dts.Client: not connected.')
         try:
-            response = requests.get(f'{self.uri}/transfers/{str(id)}')
+            response = requests.get(url=f'{self.uri}/transfers/{str(id)}',
+                                    auth=self.auth)
             response.raise_for_status()
         except HTTPError as http_err:
             logger.error(f'HTTP error occurred: {http_err}')
@@ -216,23 +250,24 @@ def transferStatus(self, id):
         else:
             results = response.json()
             return TransferStatus(
-                id                    = response['id'],
-                status                = response['status'],
-                message               = response['message'] if 'message' in response else None,
-                num_files             = response['num_files'],
-                num_files_transferred = response['num_files_transferred'],
+                id                    = results['id'],
+                status                = results['status'],
+                message               = results['message'] if 'message' in results else None,
+                num_files             = results['num_files'],
+                num_files_transferred = results['num_files_transferred'],
             )
 
-    def deleteTransfer(self, id):
+    def cancel_transfer(self, id):
         """
-`client.deleteTransfer(id) -> None
+`client.cancel_transfer(id) -> None
 
 * Deletes a file transfer, canceling
 """
         if not self.uri:
             raise RuntimeError('dts.Client: not connected.')
         try:
-            response = requests.delete(f'{self.uri}/transfers/{str(id)}')
+            response = requests.delete(url=f'{self.uri}/transfers/{str(id)}',
+                                       auth=self.auth)
             response.raise_for_status()
         except HTTPError as http_err:
             logger.error(f'HTTP error occurred: {http_err}')

diff --git a/requirements.txt b/requirements.txt
@@ -4,6 +4,7 @@ certifi==2024.2.2
 chardet==5.2.0
 charset-normalizer==3.3.2
 click==8.1.7
+coverage==7.6.0
 frictionless==5.17.0
 humanize==4.9.0
 idna==3.7

diff --git a/test/__init__.py b/test/__init__.py
diff --git a/test/test_client.py b/test/test_client.py
@@ -0,0 +1,65 @@
+# unit tests for the dts.client package
+
+import dts
+import os
+import unittest
+
+class TestClient(unittest.TestCase):
+    """Unit tests for dts.client.Client"""
+
+    def setUp(self):
+        self.token = os.getenv('DTS_KBASE_DEV_TOKEN')
+        if not self.token:
+            raise ValueError('Environment variable DTS_KBASE_DEV_TOKEN must be set!')
+        self.server = "https://lb-dts.staging.kbase.us"
+
+    def test_ctor(self):
+        client = dts.Client(api_key = self.token, server = self.server)
+        self.assertTrue(client.uri)
+        self.assertTrue(client.name)
+        self.assertTrue(client.version)
+
+    def test_connect(self):
+        client = dts.Client()
+        self.assertFalse(client.uri)
+        self.assertFalse(client.name)
+        self.assertFalse(client.version)
+        client.connect(api_key = self.token, server = self.server)
+        self.assertTrue(client.uri)
+        self.assertTrue(client.name)
+        self.assertTrue(client.version)
+        client.disconnect()
+        self.assertFalse(client.uri)
+        self.assertFalse(client.name)
+        self.assertFalse(client.version)
+
+    def test_databases(self):
+        client = dts.Client(api_key = self.token, server = self.server)
+        dbs = client.databases()
+        self.assertTrue(isinstance(dbs, list))
+        self.assertEqual(2, len(dbs))
+        self.assertTrue(any([db.id == 'jdp' for db in dbs]))
+        self.assertTrue(any([db.id == 'kbase' for db in dbs]))
+
+    def test_basic_jdp_search(self):
+        client = dts.Client(api_key = self.token, server = self.server)
+        results = client.search(database = 'jdp', query = '3300047546')
+        self.assertTrue(isinstance(results, list))
+        self.assertTrue(len(results) > 0)
+        self.assertTrue(all([result.to_dict()['id'].startswith('JDP:')
+                             for result in results]))
+
+    def test_jdp_search_for_taxon_oid(self):
+        client = dts.Client(api_key = self.token, server = self.server)
+        taxon_oid = '2582580701'
+        params = {'f': 'img_taxon_oid', 'extra': 'img_taxon_oid'}
+        results = client.search(database = 'jdp',
+                                query = taxon_oid,
+                                specific = params)
+        self.assertTrue(isinstance(results, list))
+        self.assertTrue(len(results) > 0)
+        self.assertTrue(any([result.to_dict()['extra']['img_taxon_oid'] == int(taxon_oid)
+                             for result in results]))
+
+if __name__ == '__main__':
+    unittest.main()