Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCT-3141 Add support for config repo GitHub release workflow #408

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ typing-extensions = "==4.2.0"
types-pyyaml = "==6.0.7"
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
types-requests = "==2.27.20"
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
shell = "==1.0.1"
pygithub = "==1.55"

[requires]
python_version = "3.7"
316 changes: 252 additions & 64 deletions Pipfile.lock

Large diffs are not rendered by default.

16 changes: 13 additions & 3 deletions deploy.cfg.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,19 @@ data-link-collection = {{ default .Env.data_link_collection "samples_data_link"
workspace-object-version-shadow-collection = {{ default .Env.workspace_object_version_shadow_collection "ws_object_version" }}
schema-collection = {{ default .Env.schema_collection "samples_schema" }}

# A URL pointing to a configuration file for any metadata validators to be installed on startup.
# See the readme file for a description of the file contents.
metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "https://raw.githubusercontent.com/kbase/sample_service_validator_config/master/metadata_validation.yml" }}
# A relative github path pointing to a configuration repo for any metadata validators to be installed on startup.
# In the form of a github repo path, e.g. `kbase/sample_service_validator_config`
# See the readme file for a description of the repo contents.
metadata-validator-config-repo = {{ default .Env.metadata_validator_config_repo "kbase/sample_service_validator_config" }}
# Release assest filename to use for the metadata validator config
metadata-validator-config-filename = {{ default .Env.metadata_validator_config_filename "metadata_validation.yml" }}
# Wether or not to consider prereleases when looking for the metadata validator config release asset.
dauglyon marked this conversation as resolved.
Show resolved Hide resolved
metadata-validator-config-prerelease = {{ default .Env.metadata_validator_config_prerelease "0" }}
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
# overrides the default metadata validator config repo and provides a direct download url to the metadata validator config.
metadata-validator-config-url = {{ default .Env.metadata_validator_config_url "" }}
# Provides a github token for authorizing validation downloads.
# Improves rate limiting dramatically, but optional for public config repos
github-token = {{ default .Env.github_token "" }}
dauglyon marked this conversation as resolved.
Show resolved Hide resolved

# Parameters for Kafka notifications.
#
Expand Down
71 changes: 63 additions & 8 deletions lib/SampleService/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
import importlib
from typing import Dict, Optional, List, Tuple
from typing import cast as _cast
import urllib as _urllib
import urllib.request as request
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
from urllib.error import URLError as _URLError
import yaml as _yaml
from yaml.parser import ParserError as _ParserError
from jsonschema import validate as _validate
import arango as _arango
from github import Github as _Github

from SampleService.core.validator.metadata_validator import MetadataValidatorSet
from SampleService.core.validator.metadata_validator import MetadataValidator as _MetadataValidator
Expand Down Expand Up @@ -76,10 +77,26 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
if kafka_servers: # have to start the server twice to test no kafka scenario
kafka_topic = _check_string(config.get('kafka-topic'), 'config param kafka-topic')

metaval_repo = _check_string(config.get('metadata-validator-config-repo'),
'config param metadata-validator-config-repo',
optional=True)

metaval_filename = _check_string(config.get('metadata-validator-config-filename'),
'config param metadata-validator-config-filename',
optional=True)
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved

metaval_prelease_ok = _check_string(config.get('metadata-validator-config-prerelease'),
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
'config param metadata-validator-config-prerelease',
optional=True)

metaval_url = _check_string(config.get('metadata-validator-config-url'),
'config param metadata-validator-config-url',
optional=True)

github_token = _check_string(config.get('github-token'),
'config param github-token',
optional=True)

# meta params may have info that shouldn't be logged so don't log any for now.
# Add code to deal with this later if needed
print(f'''
Expand All @@ -105,11 +122,18 @@ def build_samples(config: Dict[str, str]) -> Tuple[Samples, KBaseUserLookup, Lis
workspace-read-admin-token: [REDACTED FOR YOUR ULTIMATE PLEASURE]
kafka-bootstrap-servers: {kafka_servers}
kafka-topic: {kafka_topic}
metadata-validators-config-repo: {metaval_repo}
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
metadata-validators-config-url: {metaval_url}
''')

# build the validators before trying to connect to arango
metaval = get_validators(metaval_url) if metaval_url else MetadataValidatorSet()
metaval = get_validators(
repo_path=metaval_repo,
repo_file=metaval_filename,
prerelease_ok= (metaval_prelease_ok or '').lower() in ('true', 'yes', 'y', '1'),
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
url=(metaval_url or None),
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
token=(github_token or None)) if (
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
metaval_url or metaval_repo) else MetadataValidatorSet()
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved

arangoclient = _arango.ArangoClient(hosts=arango_url)
arango_db = arangoclient.db(
Expand Down Expand Up @@ -198,7 +222,7 @@ def _check_string_req(s: Optional[str], name: str) -> str:
}


def get_validators(url: str) -> MetadataValidatorSet:
def get_validators(repo_path: Optional[str] = None, repo_file: Optional[str] = None, prerelease_ok: Optional[bool] = False, url: Optional[str] = None, token: Optional[str] = None) -> MetadataValidatorSet:
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
'''
Given a url pointing to a config file, initialize any metadata validators present
in the configuration.
Expand All @@ -207,15 +231,46 @@ def get_validators(url: str) -> MetadataValidatorSet:
:returns: A set of metadata validators.
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
'''
# TODO VALIDATOR make validator CLI

try:
with _urllib.request.urlopen(url) as res:
cfg = _yaml.safe_load(res)
config_asset = None
if url:
config_url = url
elif not repo_path:
raise ValueError(f'No metadata validator config URL or repo path.')
else:
try:
repo = _Github(login_or_token=token).get_repo(repo_path)
releases = [rel for rel in repo.get_releases() if prerelease_ok or not rel.prerelease]
except:
raise RuntimeError(f'Fetching releases from repo {repo_path} failed.')
dauglyon marked this conversation as resolved.
Show resolved Hide resolved
if not releases:
raise ValueError(f'No releases found in validator config repo {repo_path}')
latest_release = releases[0] # max(releases, key=lambda rel: rel.created_at)
assets = latest_release.get_assets()
if not assets:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There doesn't seem to be a test for this case

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is "this"?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you mean the no-assets case. It's not currently replicable with GH, but the API spec doesnt specifically disallow an empty array here so, this is a "just in case" error that would require mocking GH reponses to test, which i think would be wild overkill

Copy link
Member

@MrCreosote MrCreosote Aug 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's practically impossible for this case to occur, I would just take code path out and add a comment that currently google will always have > 0 assets. If things change it'll throw a TypeError which I think is fine

raise ValueError(f'No assets found in validator config repo {repo_path}')
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
config_asset = next((a for a in assets if a.name==repo_file), None)
if not config_asset:
raise ValueError(f'No config asset found in validator config repo {repo_path}')
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
config_url = config_asset.url

req = request.Request(config_url)
req.add_header('Accept', 'application/octet-stream')
if token:
req.add_header('Authorization', f'token {token}')
with request.urlopen(req) as response:
cfg = _yaml.safe_load(response)

except _URLError as e:
raise ValueError(
f'Failed to open validator configuration file at {url}: {str(e.reason)}') from e
if config_asset:
raise ValueError(f'Error downloading config asset from {config_asset.url}: {str(e.reason)}') from e
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
else:
raise ValueError(f'Error downloading config asset from {url or repo_path}: {str(e.reason)}') from e
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved
except _ParserError as e:
raise ValueError(
f'Failed to open validator configuration file at {url}: {str(e)}') from e
f'Failed to open validator configuration file from {url or repo_path}: {str(e)}') from e
MrCreosote marked this conversation as resolved.
Show resolved Hide resolved

_validate(instance=cfg, schema=_META_VAL_JSONSCHEMA)

mvals = _get_validators(
Expand Down
2 changes: 1 addition & 1 deletion test/SampleService_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def test_init_fail():
# get_validators is tested elsewhere, just make sure it'll error out
cfg['metadata-validator-config-url'] = 'https://kbase.us/services'
init_fail(cfg, ValueError(
'Failed to open validator configuration file at https://kbase.us/services: Not Found'))
'Error downloading config asset from https://kbase.us/services: Not Found'))


def init_fail(config, expected):
Expand Down
14 changes: 7 additions & 7 deletions test/core/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_config_get_validators(temp_dir):
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's no tests for the new validation download

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to be tricky as I'll need to mock the github api, but I can give it a go

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually I want self-contained tests, but for this case my initial thought would be to just set up a github repo to test against. Mocking all that stuff seems like too much work for too little gain over just relying on an external resource. Is there a reason I'm not seeing that that's not doable or a bad idea?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

... AND ANOTHER THING... mocking external dependencies is an anti-pattern anyway since if the external APIs change your tests will continue to pass but your actual deployments will fail

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a test for a few cases:

  • No url or repo supplied
  • the no releases case
  • the no assets case
  • parser errors

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I have no privs for https://github.com/kbasetest

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the "no assets" case seems to never happen at the moment on GitHub as all releases include an asset file of the zipped source.

I'd ditch that check and add a comment mentioning that in that case

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately I have no privs

Invited

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added tests for check_bool, no releases, no URL or repo_path, and repo_path but no repo_asset. Unfortunately still have no idea why coverage isnt running on this PR but I dont see it on many other PRs in the repo...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All I can say is that it ran on every PR I ever did from very early on, I think it was one of the first things I set up

}
tf = _write_validator_config(cfg, temp_dir)
vals = get_validators('file://' + tf)
vals = get_validators(url='file://' + tf)
assert len(vals.keys()) == 3
assert len(vals.prefix_keys()) == 3
# the test validators always fail
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_config_get_validators(temp_dir):
# noop entry
cfg = {}
tf = _write_validator_config(cfg, temp_dir)
vals = get_validators('file://' + tf)
vals = get_validators(url='file://' + tf)
assert len(vals.keys()) == 0
assert len(vals.prefix_keys()) == 0

Expand All @@ -157,9 +157,9 @@ def test_config_get_validators_fail_bad_file(temp_dir):
tf = _write_validator_config({}, temp_dir)
os.remove(tf)
with raises(Exception) as got:
get_validators('file://' + tf)
get_validators(url='file://' + tf)
assert_exception_correct(got.value, ValueError(
f"Failed to open validator configuration file at file://{tf}: " +
f"Error downloading config asset from file://{tf}: " +
f"[Errno 2] No such file or directory: '{tf}'"))


Expand All @@ -170,9 +170,9 @@ def test_config_get_validators_fail_bad_yaml(temp_dir):
with open(tf[1], 'w') as temp:
temp.write('[bad yaml')
with raises(Exception) as got:
get_validators('file://' + tf[1])
get_validators(url='file://' + tf[1])
assert_exception_correct(got.value, ValueError(
f'Failed to open validator configuration file at file://{tf[1]}: while parsing a ' +
f'Failed to open validator configuration file from file://{tf[1]}: while parsing a ' +
'flow sequence\n in "<urllib response>", line 1, column 1\nexpected \',\' or \']\', ' +
'but got \'<stream end>\'\n in "<urllib response>", line 1, column 10'
))
Expand Down Expand Up @@ -295,5 +295,5 @@ def test_config_get_prefix_validators_fail_function_exception(temp_dir):
def _config_get_validators_fail(cfg, temp_dir, expected):
tf = _write_validator_config(cfg, temp_dir)
with raises(Exception) as got:
get_validators('file://' + tf)
get_validators(url='file://' + tf)
assert_exception_correct(got.value, expected)