From 52cc963dc24ff74764ae7403cf5c8c92830da320 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 2 Aug 2022 19:04:49 +0200 Subject: [PATCH 1/2] Add CLI validation of JSON-LD Closes #490 --- src/bioregistry/cli.py | 2 + src/bioregistry/validate/__init__.py | 1 + src/bioregistry/validate/cli.py | 61 ++++++++++++++++++++++++++++ src/bioregistry/validate/utils.py | 37 +++++++++++++++++ tests/test_validation.py | 32 +++++++++++++++ 5 files changed, 133 insertions(+) create mode 100644 src/bioregistry/validate/__init__.py create mode 100644 src/bioregistry/validate/cli.py create mode 100644 src/bioregistry/validate/utils.py create mode 100644 tests/test_validation.py diff --git a/src/bioregistry/cli.py b/src/bioregistry/cli.py index 0aeb61044..e5c5a2832 100644 --- a/src/bioregistry/cli.py +++ b/src/bioregistry/cli.py @@ -11,6 +11,7 @@ from .export.cli import export from .lint import lint from .utils import get_hexdigests, secho +from .validate.cli import validate from .version import VERSION @@ -74,6 +75,7 @@ def align(skip_fairsharing: bool): main.add_command(lint) main.add_command(compare) main.add_command(export) +main.add_command(validate) main.add_command(make_web_command("bioregistry.app.wsgi:app")) diff --git a/src/bioregistry/validate/__init__.py b/src/bioregistry/validate/__init__.py new file mode 100644 index 000000000..99871a3fd --- /dev/null +++ b/src/bioregistry/validate/__init__.py @@ -0,0 +1 @@ +"""Validation utilities.""" diff --git a/src/bioregistry/validate/cli.py b/src/bioregistry/validate/cli.py new file mode 100644 index 000000000..c92fed2fa --- /dev/null +++ b/src/bioregistry/validate/cli.py @@ -0,0 +1,61 @@ +"""Validation command line interface. + +JSON-LD Validation + +1. Passes ``bioregistry validate jsonld "https://bioregistry.io/api/collection/0000002?format=context"`` +1. Fails ``bioregistry validate jsonld "https://raw.githubusercontent.com/prefixcommons/prefixcommons-py/master/prefixcommons/registry/go_context.jsonld"`` +2. +""" + +import json +import sys +from pathlib import Path + +import click +import requests + +from .utils import validate_jsonld + +__all__ = [ + "validate", +] + + +@click.group() +def validate(): + """Validate data with the Bioregistry.""" + + +@validate.command() +@click.argument("location") +@click.option("--relax", is_flag=True) +def jsonld(location: str, relax: bool): + """Validate a JSON-LD file.""" + if location.startswith("http://") or location.startswith("https://"): + res = requests.get(location) + res.raise_for_status() + obj = res.json() + else: + path = Path(location).resolve() + if not path.is_file(): + raise ValueError + obj = json.loads(path.read_text()) + + messages = validate_jsonld(obj, strict=not relax) + for message in messages: + error, prefix, solution, level = message["error"], message["prefix"], message["solution"], message['level'] + click.secho(f"{prefix} - {error}", fg=LEVEL_TO_COLOR[level], nl=False) + if solution: + click.echo(" > " + solution) + else: + click.echo("") + + if any(message["level"] == "error" for message in messages): + click.secho("failed", fg="red") + sys.exit(1) + + +LEVEL_TO_COLOR = { + "warning": "yellow", + "error": "red", +} diff --git a/src/bioregistry/validate/utils.py b/src/bioregistry/validate/utils.py new file mode 100644 index 000000000..21686425a --- /dev/null +++ b/src/bioregistry/validate/utils.py @@ -0,0 +1,37 @@ +"""Validation utilities.""" + +from typing import Mapping + +import bioregistry + +__all__ = [ + "validate_jsonld", +] + + +def validate_jsonld(obj: Mapping[str, Mapping[str, str]], strict: bool = True): + if not isinstance(obj, dict): + raise TypeError(f"data is not a dictionary") + context = obj.get("@context") + if context is None: + raise TypeError(f"data is missing a @context field") + if not isinstance(context, dict): + raise TypeError(f"@context is not a dictionary: {context}") + messages = [] + for prefix, uri_prefix in context.items(): + norm_prefix = bioregistry.normalize_prefix(prefix) + if norm_prefix is None: + messages.append({ + "prefix": prefix, + "error": "invalid", + "solution": None, + "level": "error", + }) + elif norm_prefix != prefix: + messages.append({ + "prefix": prefix, + "error": "nonstandard", + "solution": f"Switch to standard prefix: {norm_prefix}", + "level": "error" if strict else "warning" + }) + return messages diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 000000000..cba549b66 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,32 @@ +"""Test for validation utilities.""" + +import unittest + +from bioregistry.validate.utils import validate_jsonld + + +class TestValidation(unittest.TestCase): + """Test case for validation utilities.""" + + def test_validate_jsonld(self): + """Test validating JSON-LD.""" + with self.assertRaises(TypeError): + validate_jsonld(None) + with self.assertRaises(TypeError): + validate_jsonld({}) + with self.assertRaises(TypeError): + validate_jsonld({"@context": None}) + + test_context = { + "@context": { + "GO": ..., + "nope": ..., + } + } + warnings, errors = validate_jsonld(test_context, strict=True) + self.assertEqual([("GO", "nonstandard"), ("nope", "invalid")], errors) + self.assertEqual([], warnings) + + warnings, errors = validate_jsonld(test_context, strict=False) + self.assertEqual([("nope", "invalid")], errors) + self.assertEqual([("GO", "nonstandard")], warnings) From 7c5ef385a048de31e6827aa4716d80a8f986f15d Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 16 Apr 2024 12:49:26 +0200 Subject: [PATCH 2/2] Lint --- src/bioregistry/validate/cli.py | 7 ++++++- src/bioregistry/validate/utils.py | 28 ++++++++++++++++------------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/bioregistry/validate/cli.py b/src/bioregistry/validate/cli.py index c92fed2fa..9ba80614c 100644 --- a/src/bioregistry/validate/cli.py +++ b/src/bioregistry/validate/cli.py @@ -43,7 +43,12 @@ def jsonld(location: str, relax: bool): messages = validate_jsonld(obj, strict=not relax) for message in messages: - error, prefix, solution, level = message["error"], message["prefix"], message["solution"], message['level'] + error, prefix, solution, level = ( + message["error"], + message["prefix"], + message["solution"], + message["level"], + ) click.secho(f"{prefix} - {error}", fg=LEVEL_TO_COLOR[level], nl=False) if solution: click.echo(" > " + solution) diff --git a/src/bioregistry/validate/utils.py b/src/bioregistry/validate/utils.py index 21686425a..74434c881 100644 --- a/src/bioregistry/validate/utils.py +++ b/src/bioregistry/validate/utils.py @@ -21,17 +21,21 @@ def validate_jsonld(obj: Mapping[str, Mapping[str, str]], strict: bool = True): for prefix, uri_prefix in context.items(): norm_prefix = bioregistry.normalize_prefix(prefix) if norm_prefix is None: - messages.append({ - "prefix": prefix, - "error": "invalid", - "solution": None, - "level": "error", - }) + messages.append( + { + "prefix": prefix, + "error": "invalid", + "solution": None, + "level": "error", + } + ) elif norm_prefix != prefix: - messages.append({ - "prefix": prefix, - "error": "nonstandard", - "solution": f"Switch to standard prefix: {norm_prefix}", - "level": "error" if strict else "warning" - }) + messages.append( + { + "prefix": prefix, + "error": "nonstandard", + "solution": f"Switch to standard prefix: {norm_prefix}", + "level": "error" if strict else "warning", + } + ) return messages