From 3aa74df562c7568060963608637841025f153370 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Mon, 18 Mar 2024 14:43:04 -0700 Subject: [PATCH 1/4] use os.path.join instead of hardcoding slash --- linkml_runtime/linkml_model/linkml_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linkml_runtime/linkml_model/linkml_files.py b/linkml_runtime/linkml_model/linkml_files.py index 48d11cc6..4fbec839 100644 --- a/linkml_runtime/linkml_model/linkml_files.py +++ b/linkml_runtime/linkml_model/linkml_files.py @@ -67,7 +67,7 @@ class _Path(Enum): RDF = "rdf" SHEXC = "shex" SHEXJ = "shex" - YAML = "model/schema" + YAML = os.path.join("model", "schema") class ReleaseTag(_AutoName): @@ -80,7 +80,7 @@ class ReleaseTag(_AutoName): def _build_path(source: Source, fmt: Format) -> str: """ Create the relative path for source and fmt """ - return f"{_Path[fmt.name].value}/{source.value}.{fmt.value}" + return os.path.join(_Path[fmt.name].value, f"{source.value}.{fmt.value}") def _build_loc(base: str, source: Source, fmt: Format) -> str: From fc72e337b0499b58f1b0217faefb066768709fa5 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 20 Mar 2024 18:22:01 -0700 Subject: [PATCH 2/4] just redo most of it --- linkml_runtime/linkml_model/linkml_files.py | 161 ++++++++++++------- tests/test_linkml_model/__init__.py | 0 tests/test_linkml_model/test_linkml_files.py | 125 ++++++++++++++ 3 files changed, 230 insertions(+), 56 deletions(-) create mode 100644 tests/test_linkml_model/__init__.py create mode 100644 tests/test_linkml_model/test_linkml_files.py diff --git a/linkml_runtime/linkml_model/linkml_files.py b/linkml_runtime/linkml_model/linkml_files.py index 4fbec839..198b052f 100644 --- a/linkml_runtime/linkml_model/linkml_files.py +++ b/linkml_runtime/linkml_model/linkml_files.py @@ -1,6 +1,8 @@ -import os +from pathlib import Path from enum import Enum, auto -from typing import Optional, Union +from typing import Dict, Optional, Union, Tuple, NamedTuple +from urllib.parse import urljoin +from dataclasses import dataclass import requests from rdflib import Namespace @@ -9,14 +11,12 @@ LINKML_NAMESPACE = Namespace(LINKML_URL_BASE) GITHUB_IO_BASE = "https://linkml.github.io/linkml-model/" GITHUB_BASE = "https://raw.githubusercontent.com/linkml/linkml-model/" -LOCAL_BASE = os.path.abspath(os.path.dirname(__file__)) +LOCAL_BASE = Path(__file__).parent.resolve() GITHUB_API_BASE = "https://api.github.com/repos/linkml/linkml-model/" GITHUB_RELEASES = GITHUB_BASE + "releases" GITHUB_TAGS = GITHUB_BASE + "tags" - - class _AutoName(Enum): @staticmethod def _generate_next_value_(name, start, count, last_values): @@ -32,42 +32,84 @@ class Source(_AutoName): EXTENSIONS = auto() -class Format(Enum): +class Format(_AutoName): """ LinkML package formats """ - GRAPHQL = "graphql" - HTML = "" - JSON = "json" - JSONLD = "context.jsonld" - JSON_SCHEMA = "schema.json" - NATIVE_JSONLD = "model.context.jsonld" - NATIVE_RDF = "model.ttl" - NATIVE_SHEXC = "model.shex" - NATIVE_SHEXJ = "model.shexj" - OWL = "owl.ttl" - PYTHON = "py" - RDF = "ttl" - SHEXC = "shex" - SHEXJ = "shexj" - YAML = "yaml" - - -class _Path(Enum): + EXCEL = auto() + GRAPHQL = auto() + JSON = auto() + JSONLD = auto() + JSON_SCHEMA = auto() + NATIVE_JSONLD = auto() + NATIVE_RDF = auto() + NATIVE_SHEXC = auto() + NATIVE_SHEXJ = auto() + OWL = auto() + PREFIXMAP = auto() + PROTOBUF = auto() + PYTHON = auto() + RDF = auto() + SHACL = auto() + SHEXC = auto() + SHEXJ = auto() + SQLDDL = auto() + SQLSCHEMA = auto() + YAML = auto() + +@dataclass +class FormatPath: + path: str + extension: str + + def model_path(self, model:str) -> Path: + return (Path(self.path) / model).with_suffix(self.extension) + +class _Path: """ LinkML Relative paths""" - GRAPHQL = "graphql" - HTML = "docs" - JSON = "json" - JSONLD = "jsonld" - JSON_SCHEMA = "jsonschema" - NATIVE_JSONLD = "jsonld" - NATIVE_RDF = "ttl" - NATIVE_SHEXC = "shex" - NATIVE_SHEXJ = "shex" - OWL = "owl" - PYTHON = "linkml_model" - RDF = "rdf" - SHEXC = "shex" - SHEXJ = "shex" - YAML = os.path.join("model", "schema") + EXCEL = FormatPath("excel","xlsx" ) + GRAPHQL = FormatPath("graphql","graphql" ) + JSON = FormatPath("json","json" ) + JSONLD = FormatPath("jsonld","context.jsonld" ) + JSON_SCHEMA = FormatPath("jsonschema", "schema.json" ) + NATIVE_JSONLD = FormatPath("jsonld", "context.jsonld" ) + NATIVE_RDF = FormatPath("rdf","ttl" ) + NATIVE_SHEXC = FormatPath("shex","shex" ) + NATIVE_SHEXJ = FormatPath("shex","shexj" ) + OWL = FormatPath("owl","owl.ttl" ) + PREFIXMAP = FormatPath('prefixmap','yaml' ) + PROTOBUF = FormatPath("protobuf","proto" ) + PYTHON = FormatPath("","py" ) + RDF = FormatPath("rdf","ttl" ) + SHACL = FormatPath("shacl","shacl.ttl" ) + SHEXC = FormatPath("shex","shex" ) + SHEXJ = FormatPath("shex","shexj" ) + SQLDDL = FormatPath("sqlddl","sql" ) + SQLSCHEMA = FormatPath("sqlschema","sql" ) + YAML = FormatPath(str(Path("model") / "schema"),"yaml" ) + + @classmethod + def items(cls) -> Dict[str, FormatPath]: + return {k:v for k,v in cls.__dict__.items() if not k.startswith('_')} + + @classmethod + def get(cls, item:Union[str,Format]) -> FormatPath: + if isinstance(item, Format): + item = item.name.upper() + return getattr(cls, item) + + def __class_getitem__(cls, item:str) -> FormatPath: + return getattr(cls, item) + + +META_ONLY = ( + Format.EXCEL, + Format.GRAPHQL, + Format.OWL, + Format.PREFIXMAP, + Format.PROTOBUF, + Format.SHACL, + Format.SQLDDL, + Format.SQLSCHEMA +) class ReleaseTag(_AutoName): @@ -78,13 +120,25 @@ class ReleaseTag(_AutoName): CURRENT = auto() -def _build_path(source: Source, fmt: Format) -> str: - """ Create the relative path for source and fmt """ - return os.path.join(_Path[fmt.name].value, f"{source.value}.{fmt.value}") +class PathParts(NamedTuple): + format: str + file: str + + +def _build_path(source: Source, fmt: Format) -> PathParts: + """ + Create the parts for a relative path for source and fmt. + Combined elsewhere into a complete path, since OS paths and URLs differ. + """ + fmt_path: FormatPath = _Path.get(fmt.name) + return PathParts(fmt_path.path, f"{source.value}.{fmt_path.extension}") def _build_loc(base: str, source: Source, fmt: Format) -> str: - return f"{base}{_build_path(source, fmt)}".replace('blob/', '') + """A github location""" + # urls are always forward slash separated, so hardcoding is appropriate here + path = '/'.join(_build_path(source, fmt)) + return urljoin(base, path).replace('blob/', '') def URL_FOR(source: Source, fmt: Format) -> str: @@ -93,11 +147,12 @@ def URL_FOR(source: Source, fmt: Format) -> str: def LOCAL_PATH_FOR(source: Source, fmt: Format) -> str: - return os.path.join(LOCAL_BASE, _build_path(source, fmt)) + return str(LOCAL_BASE.joinpath(*_build_path(source, fmt))) -def GITHUB_IO_PATH_FOR(source: Source, fmt: Format) -> str: - return _build_loc(GITHUB_IO_BASE, source, fmt) +def GITHUB_IO_PATH_FOR(source: Source, fmt: Format, version="latest") -> str: + path = '/'.join([version, 'linkml_model', *_build_path(source, fmt)]) + return urljoin(GITHUB_IO_BASE, path) def GITHUB_PATH_FOR(source: Source, @@ -122,7 +177,8 @@ def tag_to_commit(tag: str) -> str: # Return the absolute latest entry for branch if release is ReleaseTag.LATEST or (release is ReleaseTag.CURRENT and branch != "main"): - return f"{GITHUB_BASE}{branch}/{_build_path(source, fmt)}" + path = '/'.join([branch, 'linkml_model', *_build_path(source, fmt)]) + return urljoin(GITHUB_BASE, path) # Return the latest published version elif release is ReleaseTag.CURRENT: @@ -139,9 +195,10 @@ class ModelLoc: def __init__(self, model: Source, fmt: Format) -> str: self._model = model self._format = fmt + self._fmt_path = _Path.get(fmt.name) def __str__(self): - return f"{self._model.value}.{self._format.value}" + return f"{self._model.value}.{self._fmt_path.extension}" def __repr__(self): return str(self) @@ -171,18 +228,10 @@ def __str__(self): def __repr__(self): return str(self) - @property - def yaml(self) -> ModelLoc: - return ModelFile.ModelLoc(self._model, Format.YAML) - @property def graphql(self) -> ModelLoc: return ModelFile.ModelLoc(self._model, Format.GRAPHQL) - @property - def html(self) -> ModelLoc: - return ModelFile.ModelLoc(self._model, Format.HTML) - @property def json(self) -> ModelLoc: return ModelFile.ModelLoc(self._model, Format.JSON) diff --git a/tests/test_linkml_model/__init__.py b/tests/test_linkml_model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_linkml_model/test_linkml_files.py b/tests/test_linkml_model/test_linkml_files.py new file mode 100644 index 00000000..d4a47560 --- /dev/null +++ b/tests/test_linkml_model/test_linkml_files.py @@ -0,0 +1,125 @@ +import pdb + +import pytest +import requests +from pathlib import Path +from itertools import product +from urllib.parse import urlparse + +from linkml_runtime.linkml_model.linkml_files import ( + Source, + Format, + _Path, + URL_FOR, + LOCAL_PATH_FOR, + LOCAL_BASE, + GITHUB_IO_PATH_FOR, + GITHUB_PATH_FOR, + META_ONLY, + ReleaseTag +) + +EXPECTED_FORMATS = [ + (source, fmt) for source, fmt in product(Source, Format) + if (fmt not in META_ONLY and source != Source.META) +] + +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_local_paths(source, fmt): + a_path = Path(LOCAL_PATH_FOR(source, fmt)) + assert a_path.exists() + assert a_path.is_absolute() + +@pytest.mark.parametrize( + 'fmt', + Format.__iter__() +) +def test_format_paths(fmt): + """Every format should have an entry in _Path""" + assert fmt.name in _Path.items() + +def test_no_unmapped_dirs(): + """ + There should be no additional directories that don't have a mapping for Format. + """ + EXCLUDES = ('__pycache__',) + + expected = {LOCAL_BASE / _Path.get(fmt.name).path for fmt in Format} + expected.add(LOCAL_BASE / 'model') + + actual = {a_dir for a_dir in LOCAL_BASE.iterdir() if a_dir.is_dir() and a_dir.name not in EXCLUDES} + # Special case the root directory + actual.add(LOCAL_BASE) + # Special case YAML which is in a subdirectory - we've checked for existence above + actual.add(LOCAL_BASE / _Path.get('YAML').path) + assert expected == actual + + +# -------------------------------------------------- +# URLs +# -------------------------------------------------- + +@pytest.mark.skip('We need to cache this...') +@pytest.mark.parametrize( + 'release_type', + ReleaseTag.__iter__() +) +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_path_exists(source,fmt, release_type): + url = GITHUB_PATH_FOR(source, fmt, release_type) + res = requests.get(url) + assert res.status_code != 404 + + +@pytest.mark.parametrize( + 'release_type', + ReleaseTag.__iter__() +) +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_path_format(source,fmt, release_type): + if release_type == ReleaseTag.CURRENT: + pytest.skip("Need to cache network requests for this") + + url = GITHUB_PATH_FOR(source, fmt, release_type) + # ensure it parses + assert urlparse(url) + # for windows... + assert '\\' not in url + + +@pytest.mark.skip("Need to cache this") +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_github_io_path(source,fmt): + url = GITHUB_IO_PATH_FOR(source, fmt) + res = requests.get(url) + assert res.status_code != 404 + + +@pytest.mark.skip('Need to cache this') +@pytest.mark.parametrize( + 'source,fmt', + EXPECTED_FORMATS +) +def test_url_for_format(source,fmt): + url = URL_FOR(source, fmt) + res = requests.get(url) + assert res.status_code != 404 + + + + + + + From 32bd5e834a2fcb8aa71c3ae6db04975cc3d887ee Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Wed, 20 Mar 2024 18:32:36 -0700 Subject: [PATCH 3/4] use skipif for requests_cache --- tests/test_linkml_model/test_linkml_files.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_linkml_model/test_linkml_files.py b/tests/test_linkml_model/test_linkml_files.py index d4a47560..55576747 100644 --- a/tests/test_linkml_model/test_linkml_files.py +++ b/tests/test_linkml_model/test_linkml_files.py @@ -6,6 +6,12 @@ from itertools import product from urllib.parse import urlparse +try: + import requests_cache + HAVE_REQUESTS_CACHE = True +except ImportError: + HAVE_REQUESTS_CACHE = False + from linkml_runtime.linkml_model.linkml_files import ( Source, Format, @@ -62,7 +68,7 @@ def test_no_unmapped_dirs(): # URLs # -------------------------------------------------- -@pytest.mark.skip('We need to cache this...') +@pytest.mark.skipif(not HAVE_REQUESTS_CACHE, reason='We need to cache this...') @pytest.mark.parametrize( 'release_type', ReleaseTag.__iter__() @@ -96,7 +102,7 @@ def test_github_path_format(source,fmt, release_type): assert '\\' not in url -@pytest.mark.skip("Need to cache this") +@pytest.mark.skipif(not HAVE_REQUESTS_CACHE,reason= "Need to cache this") @pytest.mark.parametrize( 'source,fmt', EXPECTED_FORMATS @@ -107,7 +113,7 @@ def test_github_io_path(source,fmt): assert res.status_code != 404 -@pytest.mark.skip('Need to cache this') +@pytest.mark.skipif(not HAVE_REQUESTS_CACHE,reason= 'Need to cache this') @pytest.mark.parametrize( 'source,fmt', EXPECTED_FORMATS From c0720245377c4fb1b7b39a1f9672562ad7982f12 Mon Sep 17 00:00:00 2001 From: sneakers-the-rat Date: Thu, 21 Mar 2024 05:17:16 -0700 Subject: [PATCH 4/4] remove pdb import, fix conditional for checking if meta when meta only --- tests/test_linkml_model/test_linkml_files.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_linkml_model/test_linkml_files.py b/tests/test_linkml_model/test_linkml_files.py index 55576747..189f1fc3 100644 --- a/tests/test_linkml_model/test_linkml_files.py +++ b/tests/test_linkml_model/test_linkml_files.py @@ -1,5 +1,3 @@ -import pdb - import pytest import requests from pathlib import Path @@ -27,7 +25,7 @@ EXPECTED_FORMATS = [ (source, fmt) for source, fmt in product(Source, Format) - if (fmt not in META_ONLY and source != Source.META) + if (fmt not in META_ONLY or source == Source.META) ] @pytest.mark.parametrize(