kbase · dauglyon · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021 · Oct 5, 2021
diff --git a/Pipfile b/Pipfile
@@ -35,6 +35,7 @@ typing-extensions = "==4.2.0"
 types-pyyaml = "==6.0.7"
 types-requests = "==2.27.20"
 shell = "==1.0.1"
+pygithub = "==1.55"
 
 [requires]
 python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -2,6 +2,10 @@
 
 ## Unreleased
 
+## 0.2.6
+
+* add option to specify a repo and asset name for the validation config, rather than a url, which dowloads that asset via the github API
+
 ## 0.2.5
 
 * organize documentation under a single `docs` directory; add some additional documentation and placeholders  [SAM-209]

diff --git a/deploy.cfg.tmpl b/deploy.cfg.tmpl
@@ -67,9 +67,21 @@ data-link-collection = {{ default .Env.data_link_collection "samples_data_link"
 workspace-object-version-shadow-collection = {{ default .Env.workspace_object_version_shadow_collection "ws_object_version" }}
 schema-collection = {{ default .Env.schema_collection "samples_schema" }}
 
-# A URL pointing to a configuration file for any metadata validators to be installed on startup.
-# See the readme file for a description of the file contents.
-metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "https://raw.githubusercontent.com/kbase/sample_service_validator_config/master/metadata_validation.yml" }}
+# A relative github path pointing to a configuration repo for any metadata validators to be installed on startup.
+# In the form of a github repo path, e.g. `kbase/sample_service_validator_config`
+# See the readme file for a description of the repo contents.
+metadata-validator-config-repo = {{ default .Env.metadata_validator_config_repo "kbase/sample_service_validator_config" }}
+# Release assest filename to use for the metadata validator config
+metadata-validator-config-filename = {{ default .Env.metadata_validator_config_filename "metadata_validation.yml" }}
+# Optional git tag of a release in the repo, if not provided the most recent will be used
+metadata-validator-config-release-tag = {{ default .Env.metadata_validator_config_release-tag "" }}
+# Whether or not to consider prereleases when looking for the metadata validator config release asset.
+metadata-validator-config-prerelease = {{ default .Env.metadata_validator_config_prerelease "false" }}
+# overrides the default metadata validator config repo and provides a direct download url to the metadata validator config.
+metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "" }}
+# Provides a github token for authorizing validation downloads. 
+# Improves rate limiting dramatically, but optional for public config repos
+github-token = {{ default .Env.github_token "" }}
 
 # Parameters for Kafka notifications.
 #

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -10,7 +10,7 @@ Setting `github-token` will help to avoid any rate limiting that may occur (1k/h
 
 The configuration repo should have chronological releases containing a configuration file. This file's name can be specified with `metadata-validator-config-filename` (`metadata_validation.yml` by default).
 
-The most recent release from the specified repo will be loaded. If pre-releases should also be included, set the `metadata-validator-config-prerelease` config variable to 'true'. 
+If `metadata-validator-config-release-tag` is provided, the config asset will be loaded from the associated release. Otherwise, the most recent release from the specified repo will be loaded. If pre-releases should also be included, set the `metadata-validator-config-prerelease` config variable to 'true'. 
 
 A direct file URL override can also be provided with the `metadata-validator-config-url` key. With this form, the url begins with `file://`, followed by a path to the directory containing the validation config file, which should be named `metadata_validation.yml` (unless overridden as described above.) This is utilized by tests.
 

diff --git a/kbase.yml b/kbase.yml
@@ -8,10 +8,10 @@ service-language:
     python
 
 module-version:
-    0.2.5
+    0.2.6
 
 owners:
-    [gaprice, slebras]
+    [gaprice, slebras, dlyon]
 
 service-config:
     dynamic-service: true
diff --git a/lib/SampleService/SampleServiceImpl.py b/lib/SampleService/SampleServiceImpl.py
@@ -57,9 +57,9 @@ class SampleService:
     # state. A method could easily clobber the state set by another while
     # the latter method is running.
     ######################################### noqa
-    VERSION = "0.2.5"
+    VERSION = "0.2.6"
     GIT_URL = "[email protected]:kbase/sample_service.git"
-    GIT_COMMIT_HASH = "b7e68b5768795d77287d8ea6d67d32b21ae34cf9"
+    GIT_COMMIT_HASH = "67212e3a0fc2700732230df1c187e3fc4a09d9ea"
 
     #BEGIN_CLASS_HEADER
     #END_CLASS_HEADER

diff --git a/lib/SampleService/core/arg_checkers.py b/lib/SampleService/core/arg_checkers.py
@@ -107,6 +107,33 @@ def check_string(string: Optional[str], name: str, max_len: int = None, optional
         raise IllegalParameterError('{} exceeds maximum length of {}'.format(name, max_len))
     return string
 
+def check_bool(boolstring: Optional[str], name: str, optional: bool = False) -> Optional[bool]:
+    '''
+    Check and parse a boolean from an input string.
+    - Accepted true values (case insensitive):  'true', 'yes', 'y', '1'
+    - Accepted false values (case insensitive): 'false', 'no', 'n', '0'
+    - Raises if input is None or whitespace only (unless the optional parameter is specified)
+    - If optional, return None if input is None or whitespace only
+
+    :param boolstring: the string to parse/check as a boolean.
+    :param name: the name of the string to be used in error messages.
+    :returns: the parsed boolean
+    :raises MissingParameterError: if the string is None or whitespace only.
+    :raises IllegalParameterError: if the string is not one of the acceptable values.
+    '''
+    # See the IDMapping service if character classes are needed.
+    # Maybe package this stuff
+    if not boolstring or not boolstring.strip():
+        if optional:
+            return None
+        raise MissingParameterError(name)
+    boolstring = boolstring.strip().lower()
+    if boolstring in ('true', 'yes', 'y', '1'):
+        return True
+    elif boolstring in ('false', 'no', 'n', '0'):
+        return False
+    else:
+        raise IllegalParameterError('{} is an invalid value {}'.format(name, boolstring))
 
 def check_timestamp(timestamp: datetime.datetime, name: str):
     '''

diff --git a/lib/SampleService/core/config.py b/lib/SampleService/core/config.py
@@ -8,22 +8,25 @@
 import importlib
 from typing import Dict, Optional, List, Tuple
 from typing import cast as _cast
-import urllib as _urllib
+import urllib.request as _request
 from urllib.error import URLError as _URLError
 import yaml as _yaml
 from yaml.parser import ParserError as _ParserError
 from jsonschema import validate as _validate
 import arango as _arango
+from github import Github as _Github
 
 from SampleService.core.validator.metadata_validator import MetadataValidatorSet
 from SampleService.core.validator.metadata_validator import MetadataValidator as _MetadataValidator
 from SampleService.core.samples import Samples
 from SampleService.core.storage.arango_sample_storage import ArangoSampleStorage \
     as _ArangoSampleStorage
-from SampleService.core.arg_checkers import check_string as _check_string
+from SampleService.core.arg_checkers import check_string as _check_string, \
+    check_bool as _check_bool
 from SampleService.core.notification import KafkaNotifier as _KafkaNotifer
 from SampleService.core.user_lookup import KBaseUserLookup
 from SampleService.core.workspace import WS as _WS
+from SampleService.core.errors import MissingParameterError as _MissingParameterError
 
 from installed_clients.WorkspaceClient import Workspace as _Workspace
 
@@ -76,10 +79,33 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
     if kafka_servers:  # have to start the server twice to test no kafka scenario
         kafka_topic = _check_string(config.get('kafka-topic'), 'config param kafka-topic')
 
+    metaval_repo = _check_string(config.get('metadata-validator-config-repo'),
+                                'config param metadata-validator-config-repo',
+                                optional=True)
+
+    metaval_filename = _check_string(config.get('metadata-validator-config-filename'),
+                                'config param metadata-validator-config-filename',
+                                optional=True)
+
+    if metaval_repo and not metaval_filename:
+        raise _MissingParameterError(metaval_filename)
+
+    metaval_release_tag = _check_string(config.get('metadata-validator-config-release-tag'),
+                                'config param metadata-validator-config-release-tag',
+                                optional=True)
+
+    metaval_prelease_ok = _check_bool(config.get('metadata-validator-config-prerelease'),
+                                'config param metadata-validator-config-prerelease',
+                                optional=True) or False
+
     metaval_url = _check_string(config.get('metadata-validator-config-url'),
                                 'config param metadata-validator-config-url',
                                 optional=True)
 
+    github_token = _check_string(config.get('github-token'),
+                                'config param github-token',
+                                optional=True)
+
     # meta params may have info that shouldn't be logged so don't log any for now.
     # Add code to deal with this later if needed
     print(f'''
@@ -105,11 +131,26 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
             workspace-read-admin-token: [REDACTED FOR YOUR ULTIMATE PLEASURE]
             kafka-bootstrap-servers: {kafka_servers}
             kafka-topic: {kafka_topic}
-            metadata-validators-config-url: {metaval_url}
+            metadata-validator-config-repo: {metaval_repo}
+            metadata-validator-config-filename: {metaval_filename}
+            metadata-validator-config-prerelease: {metaval_prelease_ok}
+            metadata-validator-config-release-tag: {metaval_release_tag}
+            metadata-validator-config-url: {metaval_url}
+            github-token: [REDACTED]
     ''')
 
     # build the validators before trying to connect to arango
-    metaval = get_validators(metaval_url) if metaval_url else MetadataValidatorSet()
+    if (metaval_url or metaval_repo):
+        metaval = get_validators(
+            repo_path=metaval_repo,
+            repo_asset=metaval_filename,
+            tag=metaval_release_tag,
+            prerelease_ok=metaval_prelease_ok,
+            url=metaval_url,
+            token=github_token
+            )
+    else:
+        metaval = MetadataValidatorSet()
 
     arangoclient = _arango.ArangoClient(hosts=arango_url)
     arango_db = arangoclient.db(
@@ -198,24 +239,89 @@ def _check_string_req(s: Optional[str], name: str) -> str:
 }
 
 
-def get_validators(url: str) -> MetadataValidatorSet:
+def get_validators(
+    repo_path: Optional[str] = None, 
+    repo_asset: Optional[str] = None, 
+    tag: Optional[str] = None,
+    prerelease_ok: bool = False, 
+    url: Optional[str] = None, 
+    token: Optional[str] = None
+    ) -> MetadataValidatorSet:
     '''
-    Given a url pointing to a config file, initialize any metadata validators present
-    in the configuration.
+    Given a github repo or url pointing to a config file, initialize any metadata 
+    validators present in the configuration. repo_path or url must be specified.
+    If a github repo is specified, pull config from the release asset repo_asset.
 
+    :param repo_path: github repo path (i.e. kbase/sample_service_validator_config)
+    :param repo_asset: release asset filename containing validator config
+    :param tag: Tag specifying a specific release to load the config from (deafult is latest)
+    :param prerelease_ok: If false, ignores pre-releases when pulling validators from github
     :param url: The URL for a config file for the metadata validators.
+    :param token: The token to be used when querying github
     :returns: A set of metadata validators.
     '''
     # TODO VALIDATOR make validator CLI
+
     try:
-        with _urllib.request.urlopen(url) as res:
-            cfg = _yaml.safe_load(res)
+        config_asset = None
+        if url:
+            config_url = url
+        elif not repo_path:
+            raise ValueError(f'No metadata validator config URL or repo path.')
+        else:
+            if not repo_asset:
+                raise ValueError(f'No repo_asset name provided for repo "{repo_path}"')
+            try: 
+                repo = _Github(login_or_token=token).get_repo(repo_path)
+                releases = [rel for rel in repo.get_releases() if prerelease_ok or not rel.prerelease]
+            except Exception as e:
+                raise RuntimeError(f'Fetching releases from repo "{repo_path}" failed.') from e
+
+            if not releases:
+                raise ValueError(f'No releases found in validator config repo "{repo_path}"')
+
+            if tag:
+                target_release = next((r for r in releases if r.tag_name==tag), None)
+                if not target_release:
+                    raise ValueError(f'No release with tag "{tag}" found in validator config repo "{repo_path}"')
+            else:
+                # Use the latest release
+                target_release = releases[0]
+
+            assets = target_release.get_assets()
+            if not assets:
+                raise ValueError(f'No assets found in validator config repo "{repo_path}", '+
+                    f'release tag "{target_release.tag_name}"')
+
+            config_asset = next((a for a in assets if a.name==repo_asset), None)
+            if not config_asset:
+                raise ValueError(f'No config asset "{repo_asset}" found in validator config '+
+                    f'repo "{repo_path}", release tag "{target_release.tag_name}"')
+
+            config_url = config_asset.url
+
+        req = _request.Request(config_url)
+        req.add_header('Accept', 'application/octet-stream')
+        if token:
+            req.add_header('Authorization', f'token {token}')
+        with _request.urlopen(req) as response:
+            cfg = _yaml.safe_load(response)
+
     except _URLError as e:
-        raise ValueError(
-            f'Failed to open validator configuration file at {url}: {str(e.reason)}') from e
+        if config_asset:
+            raise ValueError(f'Error downloading config asset from repo "{repo_path}" '+
+                f'tag "{tag}" asset "{config_asset.url}": {str(e.reason)}') from e
+        else:
+            raise ValueError(f'Error downloading config asset from {url}: {str(e.reason)}') from e
     except _ParserError as e:
-        raise ValueError(
-            f'Failed to open validator configuration file at {url}: {str(e)}') from e
+        if config_asset:
+            raise ValueError(
+                f'Failed to open validator configuration file from repo "{repo_path}" '+
+                    f'tag "{tag}" asset "{config_asset.url}": {str(e)}') from e
+        else:
+            raise ValueError(
+                f'Failed to open validator configuration file from {url}: {str(e)}') from e
+
     _validate(instance=cfg, schema=_META_VAL_JSONSCHEMA)
 
     mvals = _get_validators(

diff --git a/test/SampleService_test.py b/test/SampleService_test.py
@@ -47,7 +47,7 @@
 # TODO should really test a start up for the case where the metadata validation config is not
 # supplied, but that's almost never going to be the case and the code is trivial, so YAGNI
 
-VER = '0.2.5'
+VER = '0.2.6'
 
 _AUTH_DB = 'test_auth_db'
 _WS_DB = 'test_ws_db'
@@ -418,7 +418,7 @@ def test_init_fail():
     # get_validators is tested elsewhere, just make sure it'll error out
     cfg['metadata-validator-config-url'] = 'https://kbase.us/services'
     init_fail(cfg, ValueError(
-        'Failed to open validator configuration file at https://kbase.us/services: Not Found'))
+        'Error downloading config asset from https://kbase.us/services: Not Found'))
 
 
 def init_fail(config, expected):

diff --git a/test/core/arg_checkers_test.py b/test/core/arg_checkers_test.py
@@ -1,7 +1,8 @@
 import datetime
 from pytest import raises
 from core.test_utils import assert_exception_correct
-from SampleService.core.arg_checkers import check_string, not_falsy, not_falsy_in_iterable
+from SampleService.core.arg_checkers import \
+    check_string, check_bool, not_falsy, not_falsy_in_iterable
 from SampleService.core.arg_checkers import check_timestamp
 from SampleService.core.errors import MissingParameterError, IllegalParameterError
 
@@ -104,6 +105,35 @@ def test_check_string_long_fail():
         assert_exception_correct(
             got.value, IllegalParameterError(f'var name exceeds maximum length of {length}'))
 
+def test_check_bool():
+    testItems={
+        'true':True,
+        'Yes ':True,
+        '     Y':True,
+        ' 1':True,
+        '  FALSE  ':False,
+        'no   ':False,
+        '  n':False,
+        '0 ':False,
+        None:None,
+        '  ':None
+    }
+    for string, expected in testItems.items():
+        assert check_bool(string, 'name', optional=True) == expected
+
+def test_check_bool_fail_nonoptional():
+    with raises(Exception) as got:
+        check_bool(None, 'name') 
+    assert_exception_correct(got.value, MissingParameterError('name'))
+
+def test_check_bool_fail_bad_val():
+    with raises(Exception) as got:
+        check_bool('maybe', 'name') 
+    assert_exception_correct(got.value, 
+        IllegalParameterError('{} is an invalid value {}'.format('name', 'maybe'))
+    )
+
+
 
 def _dt(timestamp):
     return datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc)