Skip to content

Commit

Permalink
Add "hed-validator" to run validation on BIDS dataset
Browse files Browse the repository at this point in the history
Unlike other hed_ scripts I prefixed this as hed- to be more consistent
with conventions like git-COMMAND, bids-validator, etc.

I have not yet had luck to get errors to see how those would look and if would
work to serialize into json.  hed-examples are all clean. Sample dataset I had
crashes validator (separate issue to follow).
  • Loading branch information
yarikoptic committed Oct 3, 2024
1 parent 37ac884 commit d398c29
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
76 changes: 76 additions & 0 deletions hed/scripts/hed_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import argparse
import json
import sys


def main():
# Create the argument parser
parser = argparse.ArgumentParser(description="Validate an HED BIDS dataset.")

# Positional argument for the dataset path
parser.add_argument("dataset_path", help="Path to the dataset directory")

# Optional argument for the format
parser.add_argument("-f", "--format", choices=["text", "json", "json_pp"], default="text",
help="Output format: 'text' (default) or 'json' ('json_pp' for pretty-printed json)")

# Optional argument for the output file
parser.add_argument("-o", "--output-file", help="File to save the output. If not provided, output is printed to the screen")

# Optional flag to check for warnings
parser.add_argument("--check-for-warnings", action="store_true",
help="Enable checking for warnings during validation")

# Parse the arguments
args = parser.parse_args()

issue_list = validate_dataset(args)

# Return 1 if there are issues, 0 otherwise
return int(bool(issue_list))


def validate_dataset(args):
# Delayed imports to speed up --help
from hed.errors import get_printable_issue_string
from hed.tools import BidsDataset
from hed import _version as vr

# Validate the dataset
bids = BidsDataset(args.dataset_path)
issue_list = bids.validate(check_for_warnings=args.check_for_warnings)
# Output based on format
if args.format in ("json", "json_pp"):
kw = {"indent": 4} if args.format == "json_pp" else {}
output = json.dumps(
{
"issues": issue_list,
"hedtools_version": str(vr.get_versions())
},
**kw)
elif args.format == "json":
output = json.dumps(issue_list)
elif args.format == "text":
# Print HEDTOOLS version
print(f"Using HEDTOOLS version: {str(vr.get_versions())}")

if issue_list:
output = get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False)
# Print number of issues
print(f"Number of issues: {len(issue_list)}")
else:
output = "No HED validation errors"
else:
raise ValueError(args.format)
# Output to file or print to screen
if args.output_file:
with open(args.output_file, 'w') as fp:
fp.write(output)
else:
print(output)
return issue_list


if __name__ == "__main__":
sys.exit(main())

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dependencies = [
run_remodel = "hed.tools.remodeling.cli.run_remodel:main"
run_remodel_backup = "hed.tools.remodeling.cli.run_remodel_backup:main"
run_remodel_restore = "hed.tools.remodeling.cli.run_remodel_restore:main"
hed-validator = "hed.scripts.hed_validator:main"
hed_validate_schemas = "hed.scripts.validate_schemas:main"
hed_update_schemas = "hed.scripts.convert_and_update_schema:main"
hed_add_ids = "hed.scripts.add_hed_ids:main"
Expand Down

0 comments on commit d398c29

Please sign in to comment.