Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace legacy validator with schema validator #337

Merged
merged 10 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
version: 2.1
orbs:
codecov: codecov/codecov@1.0.5
codecov: codecov/codecov@3.2.4

jobs:
run_pytests:
Expand Down Expand Up @@ -31,10 +31,8 @@ jobs:
source activate cubids
conda install -c conda-forge -y datalad

# Add nodejs and the validator
conda install nodejs
npm install -g yarn && \
npm install -g [email protected]
# Add deno to run the schema validator
conda install deno
tsalo marked this conversation as resolved.
Show resolved Hide resolved

# Install CuBIDS
pip install -e .[tests]
Expand Down Expand Up @@ -64,7 +62,8 @@ jobs:

# We need curl for the codecov upload
apt-get update
apt-get install -yqq curl
apt-get install -y -qq curl
apt-get install -y gnupg

cd /home/circleci/src/coverage/
echo "Merge coverage files"
Expand Down
50 changes: 43 additions & 7 deletions cubids/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _parse_validate():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -107,6 +107,41 @@ def _enter_validate(argv=None):
workflows.validate(**args)


def _parse_bids_version():
parser = argparse.ArgumentParser(
tientong98 marked this conversation as resolved.
Show resolved Hide resolved
description="cubids bids-version: Get BIDS Validator and Schema version",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
PathExists = partial(_path_exists, parser=parser)

parser.add_argument(
"bids_dir",
type=PathExists,
action="store",
help=(
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
parser.add_argument(
"--write",
action="store_true",
default=False,
help=(
"Save the validator and schema version to 'dataset_description.json' "
"when using `cubids bids-version /bids/path --write`. "
"By default, `cubids bids-version /bids/path` prints to the terminal."
),
)
return parser


def _enter_bids_version(argv=None):
options = _parse_bids_version().parse_args(argv)
args = vars(options).copy()
workflows.bids_version(**args)


def _parse_bids_sidecar_merge():
parser = argparse.ArgumentParser(
description=("bids-sidecar-merge: merge critical keys from one sidecar to another"),
Expand Down Expand Up @@ -153,7 +188,7 @@ def _parse_group():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -220,7 +255,7 @@ def _parse_apply():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -316,7 +351,7 @@ def _parse_datalad_save():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -358,7 +393,7 @@ def _parse_undo():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -582,7 +617,7 @@ def _parse_remove_metadata_fields():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand Down Expand Up @@ -628,7 +663,7 @@ def _parse_print_metadata_fields():
type=PathExists,
action="store",
help=(
"the root of a BIDS dataset. It should contain "
"The root of a BIDS dataset. It should contain "
"sub-X directories and dataset_description.json"
),
)
Expand All @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None):

COMMANDS = [
("validate", _parse_validate, workflows.validate),
("bids-version", _parse_bids_version, workflows.bids_version),
("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge),
("group", _parse_group, workflows.group),
("apply", _parse_apply, workflows.apply),
Expand Down
17 changes: 14 additions & 3 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,9 +1336,20 @@ def get_all_metadata_fields(self):
found_fields = set()
for json_file in Path(self.path).rglob("*.json"):
if ".git" not in str(json_file):
with open(json_file, "r") as jsonr:
metadata = json.load(jsonr)
found_fields.update(metadata.keys())
# add this in case `print-metadata-fields` is run before validate
try:
with open(json_file, "r", encoding="utf-8") as jsonr:
content = jsonr.read().strip()
if not content:
print(f"Empty file: {json_file}")
continue
metadata = json.loads(content)
found_fields.update(metadata.keys())
except json.JSONDecodeError as e:
warnings.warn(f"Error decoding JSON in {json_file}: {e}")
except Exception as e:
warnings.warn(f"Unexpected error with file {json_file}: {e}")

return sorted(found_fields)

def remove_metadata_fields(self, fields_to_remove):
Expand Down
44 changes: 43 additions & 1 deletion cubids/tests/test_bond.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
import pandas as pd
import pytest
from packaging.version import Version

from cubids.cubids import CuBIDS
from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite
Expand All @@ -22,7 +23,15 @@
file_hash,
get_data,
)
from cubids.validator import build_validator_call, parse_validator_output, run_validator
from cubids.validator import (
build_validator_call,
parse_validator_output,
run_validator,
get_bids_validator_version,
extract_summary_info,
update_dataset_description,
bids_validator_version,
)

COMPLETE_KEY_GROUPS = [
"acquisition-HASC55AP_datatype-dwi_suffix-dwi",
Expand Down Expand Up @@ -1028,6 +1037,39 @@ def test_validator(tmp_path):
assert isinstance(parsed, pd.DataFrame)


def test_bids_version(tmp_path):
"""Test workflows.bids_version."""
data_root = get_data(tmp_path)
bids_dir = Path(data_root) / "complete"

# Ensure the test directory exists
assert bids_dir.exists()

# test the validator in valid dataset
call = build_validator_call(bids_dir)
ret = run_validator(call)

assert ret.returncode == 0

decoded = ret.stdout.decode("UTF-8")

# Get the BIDS validator version
validator_version = Version(get_bids_validator_version()["ValidatorVersion"])
# Extract schemaVersion
schema_version = Version(extract_summary_info(decoded)["SchemaVersion"])

# Set baseline versions to compare against
min_validator_version = Version("2.0.0")
min_schema_version = Version("0.11.3")

assert (
validator_version >= min_validator_version
), f"Validator version {validator_version} is less than minimum {min_validator_version}"
assert (
schema_version >= min_schema_version
), f"Schema version {schema_version} is less than minimum {min_schema_version}"


def test_docker():
"""Verify that docker is installed and the user has permission to run docker images.

Expand Down
3 changes: 2 additions & 1 deletion cubids/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@
"""

import argparse

import pytest

from cubids.cli import _path_exists, _is_file, _get_parser, _main
from cubids.cli import _get_parser, _is_file, _main, _path_exists


def _test_path_exists():
Expand Down
Loading
Loading