Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SCHEMA: Add file rule for phenotype tables #1672

Merged
merged 7 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/metaschema.json
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,10 @@
"type": "object",
"properties": {
"level": { "enum": ["optional", "recommended", "required"] },
"datatypes": {
"type": "array",
"items": { "pattern": "^[a-z]+$" }
},
"stem": { "type": "string" },
"extensions": { "type": "array", "items": { "type": "string" } }
},
Expand Down
13 changes: 13 additions & 0 deletions src/schema/rules/files/common/tables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,16 @@ sessions: # This file may only exist if session is present in the dataset.
- .json
entities:
subject: required

# Phenotype is a special case where there are no applicable entities, but a
# parent directory is specified. This most closely matches datatype in the current
# structure. We also require a stem that can match any value, as there are no
# constraints on the filename except extension.
phenotype:
level: optional
datatypes:
- phenotype
stem: '*'
extensions:
- .tsv
- .json
1 change: 1 addition & 0 deletions tools/schemacode/bidsschematools/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"qmri_tb1tfl", # fmap, _TB1TFL
"qmri_vfa", # derivatives
"ds000248", # .bidsignore
"fnirs_automaticity", # phenotypic
]
# Errors are described in the README of the respective datasets:
# https://github.com/bids-standard/bids-error-examples
Expand Down
5 changes: 5 additions & 0 deletions tools/schemacode/bidsschematools/data/tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ def test_rule_objects(schema_obj):

# Build a list of items mentioned in rules, but not found in objects.
if use not in object_values:
if (use, object_type) == ("phenotype", "datatypes"):
# Special case: phenotype is a top-level directory
# that acts like a datatype, but we don't want to
# define it that way in the glossary, currently.
continue
temp_path = path[:]
if is_list:
temp_path[-1] += f"[{i_use}]"
Expand Down
20 changes: 15 additions & 5 deletions tools/schemacode/bidsschematools/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
``schema.rules.files``.
"""

import fnmatch
import re
import typing as ty
from collections.abc import Mapping
Expand Down Expand Up @@ -125,7 +126,7 @@ def _entity_rule(rule: Mapping, schema: bst.types.Namespace):
ext_regex = f"(?P<extension>{ext_match})"

return {
"regex": "".join(dir_regex + entity_regex + [suffix_regex, ext_regex]),
"regex": "".join(dir_regex + entity_regex + [suffix_regex, ext_regex, r"\Z"]),
"mandatory": False,
}

Expand Down Expand Up @@ -170,15 +171,24 @@ def _sanitize_extension(ext: str) -> str:


def _stem_rule(rule: bst.types.Namespace):
stem_regex = re.escape(rule.stem)
# translate includes a trailing \Z (end of string) but we expect extensions
stem_match = fnmatch.translate(rule.stem)[:-2]
stem_regex = f"(?P<stem>{stem_match})"

dtypes = set(rule.get("datatypes", ()))
dir_regex = f"(?P<datatype>{'|'.join(dtypes)})/" if dtypes else ""

ext_match = "|".join(_sanitize_extension(ext) for ext in rule.extensions)
ext_regex = f"(?P<extension>{ext_match})"
ext_regex = rf"(?P<extension>{ext_match})\Z"

return {"regex": stem_regex + ext_regex, "mandatory": rule.level == "required"}
return {"regex": dir_regex + stem_regex + ext_regex, "mandatory": rule.level == "required"}


def _path_rule(rule: bst.types.Namespace):
return {"regex": re.escape(rule.path), "mandatory": rule.level == "required"}
path_match = re.escape(rule.path)
# Exact path matches may be files or opaque directories
# Consider using rules.directories to identify opaque directories
return {"regex": rf"(?P<path>{path_match})(?:/.*)?\Z", "mandatory": rule.level == "required"}


def regexify_filename_rules(
Expand Down
26 changes: 20 additions & 6 deletions tools/schemacode/bidsschematools/tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_entity_rule(schema_obj):
r"sub-(?P=subject)_"
r"(?:ses-(?P=session)_)?"
r"(?P<suffix>T1w)"
r"(?P<extension>\.nii)"
r"(?P<extension>\.nii)\Z"
),
"mandatory": False,
}
Expand All @@ -43,7 +43,7 @@ def test_entity_rule(schema_obj):
r"(?:sub-(?P=subject)_)?"
r"(?:ses-(?P=session)_)?"
r"(?P<suffix>T1w)"
r"(?P<extension>\.json)"
r"(?P<extension>\.json)\Z"
),
"mandatory": False,
}
Expand Down Expand Up @@ -84,28 +84,42 @@ def test_split_inheritance_rules():
def test_stem_rule():
rule = Namespace.build({"stem": "README", "level": "required", "extensions": ["", ".md"]})
assert rules._stem_rule(rule) == {
"regex": r"README(?P<extension>|\.md)",
"regex": r"(?P<stem>(?s:README))(?P<extension>|\.md)\Z",
"mandatory": True,
}

rule = Namespace.build(
{"stem": "participants", "level": "optional", "extensions": [".tsv", ".json"]}
)
assert rules._stem_rule(rule) == {
"regex": r"participants(?P<extension>\.tsv|\.json)",
"regex": r"(?P<stem>(?s:participants))(?P<extension>\.tsv|\.json)\Z",
"mandatory": False,
}

# Wildcard stem, with datatype
rule = Namespace.build(
{
"stem": "*",
"datatypes": ["phenotype"],
"level": "optional",
"extensions": [".tsv", ".json"],
}
)
assert rules._stem_rule(rule) == {
"regex": r"(?P<datatype>phenotype)/(?P<stem>(?s:.*))(?P<extension>\.tsv|\.json)\Z",
"mandatory": False,
}


def test_path_rule():
rule = Namespace.build({"path": "dataset_description.json", "level": "required"})
assert rules._path_rule(rule) == {
"regex": r"dataset_description\.json",
"regex": r"(?P<path>dataset_description\.json)(?:/.*)?\Z",
"mandatory": True,
}

rule = Namespace.build({"path": "LICENSE", "level": "optional"})
assert rules._path_rule(rule) == {"regex": "LICENSE", "mandatory": False}
assert rules._path_rule(rule) == {"regex": r"(?P<path>LICENSE)(?:/.*)?\Z", "mandatory": False}


def test_regexify_all():
Expand Down