Skip to content

Commit

Permalink
Merge pull request #337 from PennLINC/bkchg/schema_validator
Browse files Browse the repository at this point in the history
Replace legacy validator with schema validator
  • Loading branch information
tientong98 authored Dec 13, 2024
2 parents c16650b + 445533a commit ea0405d
Show file tree
Hide file tree
Showing 9 changed files with 347 additions and 91 deletions.
11 changes: 5 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2.1
orbs:
codecov: codecov/codecov@1.0.5
codecov: codecov/codecov@3.2.4

jobs:
run_pytests:
Expand Down Expand Up @@ -31,10 +31,8 @@ jobs:
source activate cubids
conda install -c conda-forge -y datalad
# Add nodejs and the validator
conda install nodejs
npm install -g yarn && \
npm install -g [email protected]
# Add deno to run the schema validator
conda install deno
# Install CuBIDS
pip install -e .[tests]
Expand Down Expand Up @@ -64,7 +62,8 @@ jobs:
# We need curl for the codecov upload
apt-get update
apt-get install -yqq curl
apt-get install -y -qq curl
apt-get install -y gnupg
cd /home/circleci/src/coverage/
echo "Merge coverage files"
Expand Down
50 changes: 43 additions & 7 deletions cubids/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _parse_validate():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -107,6 +107,41 @@ def _enter_validate(argv=None):
workflows.validate(**args)


def _parse_bids_version():
parser = argparse.ArgumentParser(
description="cubids bids-version: Get BIDS Validator and Schema version",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
PathExists = partial(_path_exists, parser=parser)

parser.add_argument(
"bids_dir",
type=PathExists,
action="store",
help=(
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
parser.add_argument(
"--write",
action="store_true",
default=False,
help=(
"Save the validator and schema version to 'dataset_description.json' "
"when using `cubids bids-version /bids/path --write`. "
"By default, `cubids bids-version /bids/path` prints to the terminal."
),
)
return parser


def _enter_bids_version(argv=None):
options = _parse_bids_version().parse_args(argv)
args = vars(options).copy()
workflows.bids_version(**args)


def _parse_bids_sidecar_merge():
parser = argparse.ArgumentParser(
description=("bids-sidecar-merge: merge critical keys from one sidecar to another"),
Expand Down Expand Up @@ -153,7 +188,7 @@ def _parse_group():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -220,7 +255,7 @@ def _parse_apply():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -316,7 +351,7 @@ def _parse_datalad_save():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -358,7 +393,7 @@ def _parse_undo():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -582,7 +617,7 @@ def _parse_remove_metadata_fields():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -628,7 +663,7 @@ def _parse_print_metadata_fields():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand All @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None):

COMMANDS = [
("validate", _parse_validate, workflows.validate),
("bids-version", _parse_bids_version, workflows.bids_version),
("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge),
("group", _parse_group, workflows.group),
("apply", _parse_apply, workflows.apply),
Expand Down
17 changes: 14 additions & 3 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,20 @@ def get_all_metadata_fields(self):
found_fields = set()
for json_file in Path(self.path).rglob("*.json"):
if ".git" not in str(json_file):
with open(json_file, "r") as jsonr:
metadata = json.load(jsonr)
found_fields.update(metadata.keys())
# add this in case `print-metadata-fields` is run before validate
try:
with open(json_file, "r", encoding="utf-8") as jsonr:
content = jsonr.read().strip()
if not content:
print(f"Empty file: {json_file}")
continue
metadata = json.loads(content)
found_fields.update(metadata.keys())
except json.JSONDecodeError as e:
warnings.warn(f"Error decoding JSON in {json_file}: {e}")
except Exception as e:
warnings.warn(f"Unexpected error with file {json_file}: {e}")

return sorted(found_fields)

def remove_metadata_fields(self, fields_to_remove):
Expand Down
44 changes: 43 additions & 1 deletion cubids/tests/test_bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
import pandas as pd
import pytest
from packaging.version import Version

from cubids.cubids import CuBIDS
from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite
Expand All @@ -22,7 +23,15 @@
file_hash,
get_data,
)
from cubids.validator import build_validator_call, parse_validator_output, run_validator
from cubids.validator import (
build_validator_call,
parse_validator_output,
run_validator,
get_bids_validator_version,
extract_summary_info,
update_dataset_description,
bids_validator_version,
)

COMPLETE_KEY_GROUPS = [
"acquisition-HASC55AP_datatype-dwi_suffix-dwi",
Expand Down Expand Up @@ -1028,6 +1037,39 @@ def test_validator(tmp_path):
assert isinstance(parsed, pd.DataFrame)


def test_bids_version(tmp_path):
"""Test workflows.bids_version."""
data_root = get_data(tmp_path)
bids_dir = Path(data_root) / "complete"

# Ensure the test directory exists
assert bids_dir.exists()

# test the validator in valid dataset
call = build_validator_call(bids_dir)
ret = run_validator(call)

assert ret.returncode == 0

decoded = ret.stdout.decode("UTF-8")

# Get the BIDS validator version
validator_version = Version(get_bids_validator_version()["ValidatorVersion"])
# Extract schemaVersion
schema_version = Version(extract_summary_info(decoded)["SchemaVersion"])

# Set baseline versions to compare against
min_validator_version = Version("2.0.0")
min_schema_version = Version("0.11.3")

assert (
validator_version >= min_validator_version
), f"Validator version {validator_version} is less than minimum {min_validator_version}"
assert (
schema_version >= min_schema_version
), f"Schema version {schema_version} is less than minimum {min_schema_version}"


def test_docker():
"""Verify that docker is installed and the user has permission to run docker images.
Expand Down
3 changes: 2 additions & 1 deletion cubids/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
"""

import argparse

import pytest

from cubids.cli import _path_exists, _is_file, _get_parser, _main
from cubids.cli import _get_parser, _is_file, _main, _path_exists


def _test_path_exists():
Expand Down
Loading

0 comments on commit ea0405d

Please sign in to comment.