kbase · dauglyon · Sep 27, 2021 · Sep 27, 2021 · Sep 27, 2021 · Oct 5, 2021
diff --git a/Pipfile b/Pipfile
@@ -35,6 +35,7 @@ typing-extensions = "==4.2.0"
 types-pyyaml = "==6.0.7"
 types-requests = "==2.27.20"
 shell = "==1.0.1"
+pygithub = "==1.55"
 
 [requires]
 python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
diff --git a/deploy.cfg.tmpl b/deploy.cfg.tmpl
@@ -67,9 +67,19 @@ data-link-collection = {{ default .Env.data_link_collection "samples_data_link"
 workspace-object-version-shadow-collection = {{ default .Env.workspace_object_version_shadow_collection "ws_object_version" }}
 schema-collection = {{ default .Env.schema_collection "samples_schema" }}
 
-# A URL pointing to a configuration file for any metadata validators to be installed on startup.
-# See the readme file for a description of the file contents.
-metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "https://raw.githubusercontent.com/kbase/sample_service_validator_config/master/metadata_validation.yml" }}
+# A relative github path pointing to a configuration repo for any metadata validators to be installed on startup.
+# In the form of a github repo path, e.g. `kbase/sample_service_validator_config`
+# See the readme file for a description of the repo contents.
+metadata-validator-config-repo = {{ default .Env.metadata_validator_config_repo "kbase/sample_service_validator_config" }}
+# Release assest filename to use for the metadata validator config
+metadata-validator-config-filename = {{ default .Env.metadata_validator_config_filename "metadata_validation.yml" }}
+# Wether or not to consider prereleases when looking for the metadata validator config release asset.
+metadata-validator-config-prerelease = {{ default .Env.metadata_validator_config_prerelease "0" }}
+# overrides the default metadata validator config repo and provides a direct download url to the metadata validator config.
+metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "" }}
+# Provides a github token for authorizing validation downloads. 
+# Improves rate limiting dramatically, but optional for public config repos
+github-token = {{ default .Env.github_token "" }}
 
 # Parameters for Kafka notifications.
 #

diff --git a/lib/SampleService/core/config.py b/lib/SampleService/core/config.py
@@ -8,12 +8,13 @@
 import importlib
 from typing import Dict, Optional, List, Tuple
 from typing import cast as _cast
-import urllib as _urllib
+import urllib.request as request
 from urllib.error import URLError as _URLError
 import yaml as _yaml
 from yaml.parser import ParserError as _ParserError
 from jsonschema import validate as _validate
 import arango as _arango
+from github import Github as _Github
 
 from SampleService.core.validator.metadata_validator import MetadataValidatorSet
 from SampleService.core.validator.metadata_validator import MetadataValidator as _MetadataValidator
@@ -76,10 +77,26 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
     if kafka_servers:  # have to start the server twice to test no kafka scenario
         kafka_topic = _check_string(config.get('kafka-topic'), 'config param kafka-topic')
 
+    metaval_repo = _check_string(config.get('metadata-validator-config-repo'),
+                                'config param metadata-validator-config-repo',
+                                optional=True)
+
+    metaval_filename = _check_string(config.get('metadata-validator-config-filename'),
+                                'config param metadata-validator-config-filename',
+                                optional=True)
+
+    metaval_prelease_ok = _check_string(config.get('metadata-validator-config-prerelease'),
+                                'config param metadata-validator-config-prerelease',
+                                optional=True)
+
     metaval_url = _check_string(config.get('metadata-validator-config-url'),
                                 'config param metadata-validator-config-url',
                                 optional=True)
 
+    github_token = _check_string(config.get('github-token'),
+                                'config param github-token',
+                                optional=True)
+
     # meta params may have info that shouldn't be logged so don't log any for now.
     # Add code to deal with this later if needed
     print(f'''
@@ -105,11 +122,18 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
             workspace-read-admin-token: [REDACTED FOR YOUR ULTIMATE PLEASURE]
             kafka-bootstrap-servers: {kafka_servers}
             kafka-topic: {kafka_topic}
+            metadata-validators-config-repo: {metaval_repo}
             metadata-validators-config-url: {metaval_url}
     ''')
 
     # build the validators before trying to connect to arango
-    metaval = get_validators(metaval_url) if metaval_url else MetadataValidatorSet()
+    metaval = get_validators(
+        repo_path=metaval_repo,
+        repo_file=metaval_filename,
+        prerelease_ok= (metaval_prelease_ok or '').lower() in ('true', 'yes', 'y', '1'),
+        url=(metaval_url or None),
+        token=(github_token or None)) if (
+            metaval_url or metaval_repo) else MetadataValidatorSet()
 
     arangoclient = _arango.ArangoClient(hosts=arango_url)
     arango_db = arangoclient.db(
@@ -198,7 +222,7 @@ def _check_string_req(s: Optional[str], name: str) -> str:
 }
 
 
-def get_validators(url: str) -> MetadataValidatorSet:
+def get_validators(repo_path: Optional[str] = None, repo_file: Optional[str] = None, prerelease_ok: Optional[bool] = False, url: Optional[str] = None, token: Optional[str] = None) -> MetadataValidatorSet:
     '''
     Given a url pointing to a config file, initialize any metadata validators present
     in the configuration.
@@ -207,15 +231,46 @@ def get_validators(url: str) -> MetadataValidatorSet:
     :returns: A set of metadata validators.
     '''
     # TODO VALIDATOR make validator CLI
+
     try:
-        with _urllib.request.urlopen(url) as res:
-            cfg = _yaml.safe_load(res)
+        config_asset = None
+        if url:
+            config_url = url
+        elif not repo_path:
+            raise ValueError(f'No metadata validator config URL or repo path.')
+        else:
+            try: 
+                repo = _Github(login_or_token=token).get_repo(repo_path)
+                releases = [rel for rel in repo.get_releases() if prerelease_ok or not rel.prerelease]
+            except:
+                raise RuntimeError(f'Fetching releases from repo {repo_path} failed.')
+            if not releases:
+                raise ValueError(f'No releases found in validator config repo {repo_path}')
+            latest_release = releases[0] # max(releases, key=lambda rel: rel.created_at)
+            assets = latest_release.get_assets()
+            if not assets:
+                raise ValueError(f'No assets found in validator config repo {repo_path}')
+            config_asset = next((a for a in assets if a.name==repo_file), None)
+            if not config_asset:
+                raise ValueError(f'No config asset found in validator config repo {repo_path}')
+            config_url = config_asset.url
+
+        req = request.Request(config_url)
+        req.add_header('Accept', 'application/octet-stream')
+        if token:
+            req.add_header('Authorization', f'token {token}')
+        with request.urlopen(req) as response:
+            cfg = _yaml.safe_load(response)
+
     except _URLError as e:
-        raise ValueError(
-            f'Failed to open validator configuration file at {url}: {str(e.reason)}') from e
+        if config_asset:
+            raise ValueError(f'Error downloading config asset from {config_asset.url}: {str(e.reason)}') from e
+        else:
+            raise ValueError(f'Error downloading config asset from {url or repo_path}: {str(e.reason)}') from e
     except _ParserError as e:
         raise ValueError(
-            f'Failed to open validator configuration file at {url}: {str(e)}') from e
+            f'Failed to open validator configuration file from {url or repo_path}: {str(e)}') from e
+
     _validate(instance=cfg, schema=_META_VAL_JSONSCHEMA)
 
     mvals = _get_validators(

diff --git a/test/SampleService_test.py b/test/SampleService_test.py
@@ -418,7 +418,7 @@ def test_init_fail():
     # get_validators is tested elsewhere, just make sure it'll error out
     cfg['metadata-validator-config-url'] = 'https://kbase.us/services'
     init_fail(cfg, ValueError(
-        'Failed to open validator configuration file at https://kbase.us/services: Not Found'))
+        'Error downloading config asset from https://kbase.us/services: Not Found'))
 
 
 def init_fail(config, expected):

diff --git a/test/core/config_test.py b/test/core/config_test.py
@@ -100,7 +100,7 @@ def test_config_get_validators(temp_dir):
         }
     }
     tf = _write_validator_config(cfg, temp_dir)
-    vals = get_validators('file://' + tf)
+    vals = get_validators(url='file://' + tf)
     assert len(vals.keys()) == 3
     assert len(vals.prefix_keys()) == 3
     # the test validators always fail
@@ -148,7 +148,7 @@ def test_config_get_validators(temp_dir):
     # noop entry
     cfg = {}
     tf = _write_validator_config(cfg, temp_dir)
-    vals = get_validators('file://' + tf)
+    vals = get_validators(url='file://' + tf)
     assert len(vals.keys()) == 0
     assert len(vals.prefix_keys()) == 0
 
@@ -157,9 +157,9 @@ def test_config_get_validators_fail_bad_file(temp_dir):
     tf = _write_validator_config({}, temp_dir)
     os.remove(tf)
     with raises(Exception) as got:
-        get_validators('file://' + tf)
+        get_validators(url='file://' + tf)
     assert_exception_correct(got.value, ValueError(
-        f"Failed to open validator configuration file at file://{tf}: " +
+        f"Error downloading config asset from file://{tf}: " +
         f"[Errno 2] No such file or directory: '{tf}'"))
 
 
@@ -170,9 +170,9 @@ def test_config_get_validators_fail_bad_yaml(temp_dir):
     with open(tf[1], 'w') as temp:
         temp.write('[bad yaml')
     with raises(Exception) as got:
-        get_validators('file://' + tf[1])
+        get_validators(url='file://' + tf[1])
     assert_exception_correct(got.value, ValueError(
-        f'Failed to open validator configuration file at file://{tf[1]}: while parsing a ' +
+        f'Failed to open validator configuration file from file://{tf[1]}: while parsing a ' +
         'flow sequence\n  in "<urllib response>", line 1, column 1\nexpected \',\' or \']\', ' +
         'but got \'<stream end>\'\n  in "<urllib response>", line 1, column 10'
     ))
@@ -295,5 +295,5 @@ def test_config_get_prefix_validators_fail_function_exception(temp_dir):
 def _config_get_validators_fail(cfg, temp_dir, expected):
     tf = _write_validator_config(cfg, temp_dir)
     with raises(Exception) as got:
-        get_validators('file://' + tf)
+        get_validators(url='file://' + tf)
     assert_exception_correct(got.value, expected)