PennLINC · tientong98 · Dec 13, 2024 · Nov 18, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -31,10 +31,8 @@ jobs:
             source activate cubids
             conda install -c conda-forge -y datalad
 
-            # Add nodejs and the validator
-            conda install nodejs
-            npm install -g yarn && \
-            npm install -g [email protected]
+            # Add deno to run the schema validator
+            conda install deno
 
             # Install CuBIDS
             pip install -e .[tests]

diff --git a/cubids/cli.py b/cubids/cli.py
@@ -107,6 +107,41 @@ def _enter_validate(argv=None):
     workflows.validate(**args)
 
 
+def _parse_bids_version():
+    parser = argparse.ArgumentParser(
+        description="cubids bids-version: Get BIDS Validator and Schema version",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    PathExists = partial(_path_exists, parser=parser)
+
+    parser.add_argument(
+        "bids_dir",
+        type=PathExists,
+        action="store",
+        help=(
+            "the root of a BIDS dataset. It should contain "
+            "sub-X directories and dataset_description.json"
+        ),
+    )
+    parser.add_argument(
+        "--write",
+        action="store_true",
+        default=False,
+        help=(
+            "Save the validator and schema version to 'dataset_description.json' "
+            "when using `cubids bids-version /bids/path --write`. "
+            "By default, `cubids bids-version /bids/path` prints to the terminal."
+        ),
+    )
+    return parser
+
+
+def _enter_bids_version(argv=None):
+    options = _parse_bids_version().parse_args(argv)
+    args = vars(options).copy()
+    workflows.bids_version(**args)
+
+
 def _parse_bids_sidecar_merge():
     parser = argparse.ArgumentParser(
         description=("bids-sidecar-merge: merge critical keys from one sidecar to another"),
@@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None):
 
 COMMANDS = [
     ("validate", _parse_validate, workflows.validate),
+    ("bids-version", _parse_bids_version, workflows.bids_version),
     ("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge),
     ("group", _parse_group, workflows.group),
     ("apply", _parse_apply, workflows.apply),

diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self):
         found_fields = set()
         for json_file in Path(self.path).rglob("*.json"):
             if ".git" not in str(json_file):
-                with open(json_file, "r") as jsonr:
-                    metadata = json.load(jsonr)
-                found_fields.update(metadata.keys())
+                # add this in case `print-metadata-fields` is run before validate
+                try:
+                    with open(json_file, "r", encoding="utf-8") as jsonr:
+                        content = jsonr.read().strip()
+                        if not content:
+                            print(f"Empty file: {json_file}")
+                            continue
+                        metadata = json.loads(content)
+                    found_fields.update(metadata.keys())
+                except json.JSONDecodeError as e:
+                    print(f"Error decoding JSON in {json_file}: {e}")
+                except Exception as e:
+                    print(f"Unexpected error with file {json_file}: {e}")
         return sorted(found_fields)
 
     def remove_metadata_fields(self, fields_to_remove):

diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py
@@ -14,9 +14,10 @@
 """
 
 import argparse
+
 import pytest
 
-from cubids.cli import _path_exists, _is_file, _get_parser, _main
+from cubids.cli import _get_parser, _is_file, _main, _path_exists
 
 
 def _test_path_exists():

diff --git a/cubids/validator.py b/cubids/validator.py
@@ -5,6 +5,7 @@
 import logging
 import os
 import pathlib
+import re
 import subprocess
 
 import pandas as pd
@@ -14,16 +15,33 @@
 
 def build_validator_call(path, ignore_headers=False):
     """Build a subprocess command to the bids validator."""
-    # build docker call
-    # CuBIDS automatically ignores subject consistency.
-    command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"]
+    # New schema BIDS validator doesn't have option to ignore subject consistency.
+    # Build the deno command to run the BIDS validator.
+    command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"]
 
     if ignore_headers:
         command.append("--ignoreNiftiHeaders")
 
     return command
 
 
+def get_bids_validator_version():
+    """Get the version of the BIDS validator.
+
+    Returns
+    -------
+    version : :obj:`str`
+        Version of the BIDS validator.
+    """
+    command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"]
+    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output = result.stdout.decode("utf-8").strip()
+    version = output.split()[-1]
+    # Remove ANSI color codes
+    clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version)
+    return {"ValidatorVersion": clean_ver}
+
+
 def build_subject_paths(bids_dir):
     """Build a list of BIDS dirs with 1 subject each."""
     bids_dir = str(bids_dir)
@@ -52,6 +70,26 @@ def build_subject_paths(bids_dir):
     return subjects_dict
 
 
+def build_first_subject_path(bids_dir, subject):
+    """Build a list of BIDS dirs with 1 subject each."""
+    bids_dir = str(bids_dir)
+    if not bids_dir.endswith("/"):
+        bids_dir += "/"
+
+    root_files = [x for x in glob.glob(bids_dir + "*") if os.path.isfile(x)]
+
+    subject_dict = {}
+
+    purepath = pathlib.PurePath(subject)
+    sub_label = purepath.name
+
+    files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)]
+    files.extend(root_files)
+    subject_dict[sub_label] = files
+
+    return subject_dict
+
+
 def run_validator(call):
     """Run the validator with subprocess.
 
@@ -87,32 +125,6 @@ def parse_validator_output(output):
         Dataframe of validator output.
     """
 
-    def get_nested(dct, *keys):
-        """Get a nested value from a dictionary.
-
-        Parameters
-        ----------
-        dct : :obj:`dict`
-            Dictionary to get value from.
-        keys : :obj:`list`
-            List of keys to get value from.
-
-        Returns
-        -------
-        :obj:`dict`
-            The nested value.
-        """
-        for key in keys:
-            try:
-                dct = dct[key]
-            except (KeyError, TypeError):
-                return None
-        return dct
-
-    data = json.loads(output)
-
-    issues = data["issues"]
-
     def parse_issue(issue_dict):
         """Parse a single issue from the validator output.
 
@@ -126,30 +138,30 @@ def parse_issue(issue_dict):
         return_dict : :obj:`dict`
             Dictionary of parsed issue.
         """
-        return_dict = {}
-        return_dict["files"] = [
-            get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "")
-        ]
-        return_dict["type"] = issue_dict.get("key", "")
-        return_dict["severity"] = issue_dict.get("severity", "")
-        return_dict["description"] = issue_dict.get("reason", "")
-        return_dict["code"] = issue_dict.get("code", "")
-        return_dict["url"] = issue_dict.get("helpUrl", "")
-
-        return return_dict
-
-    df = pd.DataFrame()
-
-    for warn in issues["warnings"]:
-        parsed = parse_issue(warn)
-        parsed = pd.DataFrame(parsed)
-        df = pd.concat([df, parsed], ignore_index=True)
-
-    for err in issues["errors"]:
-        parsed = parse_issue(err)
-        parsed = pd.DataFrame(parsed)
-        df = pd.concat([df, parsed], ignore_index=True)
+        return {
+            "location": issue_dict.get("location", ""),
+            "code": issue_dict.get("code", ""),
+            "issueMessage": issue_dict.get("issueMessage", ""),
+            "subCode": issue_dict.get("subCode", ""),
+            "severity": issue_dict.get("severity", ""),
+            "rule": issue_dict.get("rule", ""),
+        }
+
+    # Load JSON data
+    data = json.loads(output)
+
+    # Extract issues
+    issues = data.get("issues", {}).get("issues", [])
+    if not issues:
+        return pd.DataFrame(
+            columns=["location", "code", "issueMessage", "subCode", "severity", "rule"]
+        )
 
+    # Parse all issues
+    parsed_issues = [parse_issue(issue) for issue in issues]
+
+    # Convert to DataFrame
+    df = pd.DataFrame(parsed_issues)
     return df
 
 
@@ -161,12 +173,106 @@ def get_val_dictionary():
     val_dict : dict
         Dictionary of values.
     """
-    val_dict = {}
-    val_dict["files"] = {"Description": "File with warning orerror"}
-    val_dict["type"] = {"Description": "BIDS validation warning or error"}
-    val_dict["severity"] = {"Description": "gravity of problem (warning/error"}
-    val_dict["description"] = {"Description": "Description of warning/error"}
-    val_dict["code"] = {"Description": "BIDS validator issue code number"}
-    val_dict["url"] = {"Description": "Link to the issue's neurostars thread"}
-
-    return val_dict
+    return {
+        "location": {"Description": "File with the validation issue."},
+        "code": {"Description": "Code of the validation issue."},
+        "issueMessage": {"Description": "Validation issue message."},
+        "subCode": {"Description": "Subcode providing additional issue details."},
+        "severity": {"Description": "Severity of the issue (e.g., warning, error)."},
+        "rule": {"Description": "Validation rule that triggered the issue."},
+    }
+
+
+def extract_summary_info(output):
+    """Extract summary information from the JSON output.
+
+    Parameters
+    ----------
+    output : str
+        JSON string of BIDS validator output.
+
+    Returns
+    -------
+    dict
+        Dictionary containing SchemaVersion and other summary info.
+    """
+    try:
+        data = json.loads(output)
+    except json.JSONDecodeError as e:
+        raise ValueError("Invalid JSON provided to get SchemaVersion.") from e
+
+    summary = data.get("summary", {})
+
+    return {"SchemaVersion": summary.get("schemaVersion", "")}
+
+
+def update_dataset_description(path, new_info):
+    """Update or append information to dataset_description.json.
+
+    Parameters
+    ----------
+    path : :obj:`str`
+        Path to the dataset.
+    new_info : :obj:`dict`
+        Information to add or update.
+    """
+    description_path = os.path.join(path, "dataset_description.json")
+
+    # Load existing data if the file exists
+    if os.path.exists(description_path):
+        with open(description_path, "r") as f:
+            existing_data = json.load(f)
+    else:
+        existing_data = {}
+
+    # Update the existing data with the new info
+    existing_data.update(new_info)
+
+    # Write the updated data back to the file
+    with open(description_path, "w") as f:
+        json.dump(existing_data, f, indent=4)
+    print(f"Updated dataset_description.json at: {description_path}")
+
+    # Check if .datalad directory exists before running the DataLad save command
+    datalad_dir = os.path.join(path, ".datalad")
+    if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir):
+        try:
+            subprocess.run(
+                [
+                    "datalad",
+                    "save",
+                    "-m",
+                    "Save BIDS validator and schema version to dataset_description",
+                    description_path,
+                ],
+                check=True,
+            )
+            print("Changes saved with DataLad.")
+        except subprocess.CalledProcessError as e:
+            print(f"Error running DataLad save: {e}")
+
+
+def bids_validator_version(output, path, write=False):
+    """Save BIDS validator and schema version.
+
+    Parameters
+    ----------
+    output : :obj:`str`
+        Path to JSON file of BIDS validator output.
+    path : :obj:`str`
+        Path to the dataset.
+    write : :obj:`bool`
+        If True, write to dataset_description.json. If False, print to terminal.
+    """
+    # Get the BIDS validator version
+    validator_version = get_bids_validator_version()
+    # Extract schemaVersion
+    summary_info = extract_summary_info(output)
+
+    combined_info = {**validator_version, **summary_info}
+
+    if write:
+        # Update the dataset_description.json file
+        update_dataset_description(path, combined_info)
+    elif not write:
+        print(combined_info)