Skip to content

Commit

Permalink
Merge pull request #310 from sneakers-the-rat/bugfix-windows-paths
Browse files Browse the repository at this point in the history
Fix `linkml_files`
  • Loading branch information
cmungall authored Mar 28, 2024
2 parents b01abb4 + c072024 commit 1af93fc
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 56 deletions.
161 changes: 105 additions & 56 deletions linkml_runtime/linkml_model/linkml_files.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from pathlib import Path
from enum import Enum, auto
from typing import Optional, Union
from typing import Dict, Optional, Union, Tuple, NamedTuple
from urllib.parse import urljoin
from dataclasses import dataclass

import requests
from rdflib import Namespace
Expand All @@ -9,14 +11,12 @@
LINKML_NAMESPACE = Namespace(LINKML_URL_BASE)
GITHUB_IO_BASE = "https://linkml.github.io/linkml-model/"
GITHUB_BASE = "https://raw.githubusercontent.com/linkml/linkml-model/"
LOCAL_BASE = os.path.abspath(os.path.dirname(__file__))
LOCAL_BASE = Path(__file__).parent.resolve()
GITHUB_API_BASE = "https://api.github.com/repos/linkml/linkml-model/"
GITHUB_RELEASES = GITHUB_BASE + "releases"
GITHUB_TAGS = GITHUB_BASE + "tags"




class _AutoName(Enum):
@staticmethod
def _generate_next_value_(name, start, count, last_values):
Expand All @@ -32,42 +32,84 @@ class Source(_AutoName):
EXTENSIONS = auto()


class Format(Enum):
class Format(_AutoName):
""" LinkML package formats """
GRAPHQL = "graphql"
HTML = ""
JSON = "json"
JSONLD = "context.jsonld"
JSON_SCHEMA = "schema.json"
NATIVE_JSONLD = "model.context.jsonld"
NATIVE_RDF = "model.ttl"
NATIVE_SHEXC = "model.shex"
NATIVE_SHEXJ = "model.shexj"
OWL = "owl.ttl"
PYTHON = "py"
RDF = "ttl"
SHEXC = "shex"
SHEXJ = "shexj"
YAML = "yaml"


class _Path(Enum):
EXCEL = auto()
GRAPHQL = auto()
JSON = auto()
JSONLD = auto()
JSON_SCHEMA = auto()
NATIVE_JSONLD = auto()
NATIVE_RDF = auto()
NATIVE_SHEXC = auto()
NATIVE_SHEXJ = auto()
OWL = auto()
PREFIXMAP = auto()
PROTOBUF = auto()
PYTHON = auto()
RDF = auto()
SHACL = auto()
SHEXC = auto()
SHEXJ = auto()
SQLDDL = auto()
SQLSCHEMA = auto()
YAML = auto()

@dataclass
class FormatPath:
path: str
extension: str

def model_path(self, model:str) -> Path:
return (Path(self.path) / model).with_suffix(self.extension)

class _Path:
""" LinkML Relative paths"""
GRAPHQL = "graphql"
HTML = "docs"
JSON = "json"
JSONLD = "jsonld"
JSON_SCHEMA = "jsonschema"
NATIVE_JSONLD = "jsonld"
NATIVE_RDF = "ttl"
NATIVE_SHEXC = "shex"
NATIVE_SHEXJ = "shex"
OWL = "owl"
PYTHON = "linkml_model"
RDF = "rdf"
SHEXC = "shex"
SHEXJ = "shex"
YAML = "model/schema"
EXCEL = FormatPath("excel","xlsx" )
GRAPHQL = FormatPath("graphql","graphql" )
JSON = FormatPath("json","json" )
JSONLD = FormatPath("jsonld","context.jsonld" )
JSON_SCHEMA = FormatPath("jsonschema", "schema.json" )
NATIVE_JSONLD = FormatPath("jsonld", "context.jsonld" )
NATIVE_RDF = FormatPath("rdf","ttl" )
NATIVE_SHEXC = FormatPath("shex","shex" )
NATIVE_SHEXJ = FormatPath("shex","shexj" )
OWL = FormatPath("owl","owl.ttl" )
PREFIXMAP = FormatPath('prefixmap','yaml' )
PROTOBUF = FormatPath("protobuf","proto" )
PYTHON = FormatPath("","py" )
RDF = FormatPath("rdf","ttl" )
SHACL = FormatPath("shacl","shacl.ttl" )
SHEXC = FormatPath("shex","shex" )
SHEXJ = FormatPath("shex","shexj" )
SQLDDL = FormatPath("sqlddl","sql" )
SQLSCHEMA = FormatPath("sqlschema","sql" )
YAML = FormatPath(str(Path("model") / "schema"),"yaml" )

@classmethod
def items(cls) -> Dict[str, FormatPath]:
return {k:v for k,v in cls.__dict__.items() if not k.startswith('_')}

@classmethod
def get(cls, item:Union[str,Format]) -> FormatPath:
if isinstance(item, Format):
item = item.name.upper()
return getattr(cls, item)

def __class_getitem__(cls, item:str) -> FormatPath:
return getattr(cls, item)


META_ONLY = (
Format.EXCEL,
Format.GRAPHQL,
Format.OWL,
Format.PREFIXMAP,
Format.PROTOBUF,
Format.SHACL,
Format.SQLDDL,
Format.SQLSCHEMA
)


class ReleaseTag(_AutoName):
Expand All @@ -78,13 +120,25 @@ class ReleaseTag(_AutoName):
CURRENT = auto()


def _build_path(source: Source, fmt: Format) -> str:
""" Create the relative path for source and fmt """
return f"{_Path[fmt.name].value}/{source.value}.{fmt.value}"
class PathParts(NamedTuple):
format: str
file: str


def _build_path(source: Source, fmt: Format) -> PathParts:
"""
Create the parts for a relative path for source and fmt.
Combined elsewhere into a complete path, since OS paths and URLs differ.
"""
fmt_path: FormatPath = _Path.get(fmt.name)
return PathParts(fmt_path.path, f"{source.value}.{fmt_path.extension}")


def _build_loc(base: str, source: Source, fmt: Format) -> str:
return f"{base}{_build_path(source, fmt)}".replace('blob/', '')
"""A github location"""
# urls are always forward slash separated, so hardcoding is appropriate here
path = '/'.join(_build_path(source, fmt))
return urljoin(base, path).replace('blob/', '')


def URL_FOR(source: Source, fmt: Format) -> str:
Expand All @@ -93,11 +147,12 @@ def URL_FOR(source: Source, fmt: Format) -> str:


def LOCAL_PATH_FOR(source: Source, fmt: Format) -> str:
return os.path.join(LOCAL_BASE, _build_path(source, fmt))
return str(LOCAL_BASE.joinpath(*_build_path(source, fmt)))


def GITHUB_IO_PATH_FOR(source: Source, fmt: Format) -> str:
return _build_loc(GITHUB_IO_BASE, source, fmt)
def GITHUB_IO_PATH_FOR(source: Source, fmt: Format, version="latest") -> str:
path = '/'.join([version, 'linkml_model', *_build_path(source, fmt)])
return urljoin(GITHUB_IO_BASE, path)


def GITHUB_PATH_FOR(source: Source,
Expand All @@ -122,7 +177,8 @@ def tag_to_commit(tag: str) -> str:

# Return the absolute latest entry for branch
if release is ReleaseTag.LATEST or (release is ReleaseTag.CURRENT and branch != "main"):
return f"{GITHUB_BASE}{branch}/{_build_path(source, fmt)}"
path = '/'.join([branch, 'linkml_model', *_build_path(source, fmt)])
return urljoin(GITHUB_BASE, path)

# Return the latest published version
elif release is ReleaseTag.CURRENT:
Expand All @@ -139,9 +195,10 @@ class ModelLoc:
def __init__(self, model: Source, fmt: Format) -> str:
self._model = model
self._format = fmt
self._fmt_path = _Path.get(fmt.name)

def __str__(self):
return f"{self._model.value}.{self._format.value}"
return f"{self._model.value}.{self._fmt_path.extension}"

def __repr__(self):
return str(self)
Expand Down Expand Up @@ -171,18 +228,10 @@ def __str__(self):
def __repr__(self):
return str(self)

@property
def yaml(self) -> ModelLoc:
return ModelFile.ModelLoc(self._model, Format.YAML)

@property
def graphql(self) -> ModelLoc:
return ModelFile.ModelLoc(self._model, Format.GRAPHQL)

@property
def html(self) -> ModelLoc:
return ModelFile.ModelLoc(self._model, Format.HTML)

@property
def json(self) -> ModelLoc:
return ModelFile.ModelLoc(self._model, Format.JSON)
Expand Down
Empty file.
129 changes: 129 additions & 0 deletions tests/test_linkml_model/test_linkml_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import pytest
import requests
from pathlib import Path
from itertools import product
from urllib.parse import urlparse

try:
import requests_cache
HAVE_REQUESTS_CACHE = True
except ImportError:
HAVE_REQUESTS_CACHE = False

from linkml_runtime.linkml_model.linkml_files import (
Source,
Format,
_Path,
URL_FOR,
LOCAL_PATH_FOR,
LOCAL_BASE,
GITHUB_IO_PATH_FOR,
GITHUB_PATH_FOR,
META_ONLY,
ReleaseTag
)

EXPECTED_FORMATS = [
(source, fmt) for source, fmt in product(Source, Format)
if (fmt not in META_ONLY or source == Source.META)
]

@pytest.mark.parametrize(
'source,fmt',
EXPECTED_FORMATS
)
def test_local_paths(source, fmt):
a_path = Path(LOCAL_PATH_FOR(source, fmt))
assert a_path.exists()
assert a_path.is_absolute()

@pytest.mark.parametrize(
'fmt',
Format.__iter__()
)
def test_format_paths(fmt):
"""Every format should have an entry in _Path"""
assert fmt.name in _Path.items()

def test_no_unmapped_dirs():
"""
There should be no additional directories that don't have a mapping for Format.
"""
EXCLUDES = ('__pycache__',)

expected = {LOCAL_BASE / _Path.get(fmt.name).path for fmt in Format}
expected.add(LOCAL_BASE / 'model')

actual = {a_dir for a_dir in LOCAL_BASE.iterdir() if a_dir.is_dir() and a_dir.name not in EXCLUDES}
# Special case the root directory
actual.add(LOCAL_BASE)
# Special case YAML which is in a subdirectory - we've checked for existence above
actual.add(LOCAL_BASE / _Path.get('YAML').path)
assert expected == actual


# --------------------------------------------------
# URLs
# --------------------------------------------------

@pytest.mark.skipif(not HAVE_REQUESTS_CACHE, reason='We need to cache this...')
@pytest.mark.parametrize(
'release_type',
ReleaseTag.__iter__()
)
@pytest.mark.parametrize(
'source,fmt',
EXPECTED_FORMATS
)
def test_github_path_exists(source,fmt, release_type):
url = GITHUB_PATH_FOR(source, fmt, release_type)
res = requests.get(url)
assert res.status_code != 404


@pytest.mark.parametrize(
'release_type',
ReleaseTag.__iter__()
)
@pytest.mark.parametrize(
'source,fmt',
EXPECTED_FORMATS
)
def test_github_path_format(source,fmt, release_type):
if release_type == ReleaseTag.CURRENT:
pytest.skip("Need to cache network requests for this")

url = GITHUB_PATH_FOR(source, fmt, release_type)
# ensure it parses
assert urlparse(url)
# for windows...
assert '\\' not in url


@pytest.mark.skipif(not HAVE_REQUESTS_CACHE,reason= "Need to cache this")
@pytest.mark.parametrize(
'source,fmt',
EXPECTED_FORMATS
)
def test_github_io_path(source,fmt):
url = GITHUB_IO_PATH_FOR(source, fmt)
res = requests.get(url)
assert res.status_code != 404


@pytest.mark.skipif(not HAVE_REQUESTS_CACHE,reason= 'Need to cache this')
@pytest.mark.parametrize(
'source,fmt',
EXPECTED_FORMATS
)
def test_url_for_format(source,fmt):
url = URL_FOR(source, fmt)
res = requests.get(url)
assert res.status_code != 404







0 comments on commit 1af93fc

Please sign in to comment.