From 9dd6cea670849dde86f01a7850706849d5ef8543 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 10 Oct 2023 15:24:54 +0200 Subject: [PATCH 01/20] add remodeler validator --- hed/tools/remodeling/cli/run_remodel.py | 6 +- .../resources/remodeler_schema.json | 954 ++++++++++++++++++ hed/tools/remodeling/validator.py | 87 ++ requirements.txt | 3 +- .../remodel_tests/all_remodel_operations.json | 284 ++++++ tests/tools/remodeling/test_validator.py | 98 ++ 6 files changed, 1429 insertions(+), 3 deletions(-) create mode 100644 hed/tools/remodeling/resources/remodeler_schema.json create mode 100644 hed/tools/remodeling/validator.py create mode 100644 tests/data/remodel_tests/all_remodel_operations.json create mode 100644 tests/tools/remodeling/test_validator.py diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 45fce71ae..1ba604035 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -6,6 +6,7 @@ from hed.errors.exceptions import HedFileError from hed.tools.util.io_util import get_file_list, get_task_from_file from hed.tools.bids.bids_dataset import BidsDataset +from hed.tools.remodeling.validator import RemodelerValidator from hed.tools.remodeling.dispatcher import Dispatcher from hed.tools.remodeling.backup_manager import BackupManager @@ -105,10 +106,11 @@ def parse_arguments(arg_list=None): print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}") with open(args.model_path, 'r') as fp: operations = json.load(fp) - parsed_operations, errors = Dispatcher.parse_operations(operations) + validator = RemodelerValidator() + errors = validator.validate(operations) if errors: raise ValueError("UnableToFullyParseOperations", - f"Fatal operation error, cannot continue:\n{Dispatcher.errors_to_str(errors)}") + f"Fatal operation error, cannot continue:\n{validator.error_to_str(errors, operations)}") return args, operations diff --git a/hed/tools/remodeling/resources/remodeler_schema.json b/hed/tools/remodeling/resources/remodeler_schema.json new file mode 100644 index 000000000..e44a25eca --- /dev/null +++ b/hed/tools/remodeling/resources/remodeler_schema.json @@ -0,0 +1,954 @@ +{ + "type": "array", + "items": { + "type": "object", + "required": [ + "operation", + "description", + "parameters" + ], + "additionalProperties": false, + "properties": { + "operation": { + "type": "string", + "enum": [ + "convert_columns", + "factor_column", + "factor_hed_tags", + "factor_hed_type", + "merge_consecutive", + "number_groups", + "number_rows", + "remap_columns", + "remove_columns", + "remove_rows", + "rename_columns", + "reorder_columns", + "split_rows", + "summarize_column_names", + "summarize_column_values", + "summarize_definitions", + "summarize_hed_tags", + "summarize_hed_type", + "summarize_hed_validation", + "summarize_sidecar_from_events" + ], + "default": "convert_columns" + }, + "description": { + "type": "string" + }, + "parameters": { + "type": "object", + "properties": {} + } + }, + "allOf": [ + { + "if": { + "properties": { + "operation": { + "const": "convert_column" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_names": { + "type": "array" + }, + "convert_to": { + "type": "string" + }, + "decimal_places": { + "type": "integer" + } + }, + "required": [ + "column_names", + "convert_to" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "factor_column" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "factor_values": { + "type": "array", + "items": { + "type": "string" + } + }, + "factor_names": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "column_name", + "factor_values", + "factor_names" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "factor_hed_tags" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "queries": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "query_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0 + }, + "expand_context": { + "type": "boolean" + } + }, + "required": [ + "queries", + "query_names", + "remove_types" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "factor_hed_type" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "type_tag": { + "type": "string" + }, + "type_values": { + "type": "array" + } + }, + "required": [ + "type_tag", + "type_values" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "merge_consecutive" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "event_code": { + "type": [ + "string", + "number" + ] + }, + "match_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "set_durations": { + "type": "boolean" + }, + "ignore_missing": { + "type": "boolean" + } + }, + "required": [ + "column_name", + "event_code", + "match_columns", + "set_durations", + "ignore_missing" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "number_groups" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "source_column": { + "type": "string" + }, + "start": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": false + }, + "stop": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": false + }, + "overwrite": { + "type": "boolean" + } + }, + "required": [ + "number_column_name", + "source_column", + "start", + "stop" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "number_rows" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "overwrite": { + "type": "boolean" + }, + "match_value": { + "type": "object", + "properties": { + "column": { + "type": "string" + }, + "value": { + "type": [ + "string", + "number" + ] + } + }, + "required": [ + "column", + "value" + ], + "additionalProperties": false + } + }, + "required": [ + "number_column_name" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "remap_columns" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "source_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "destination_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "map_list": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + } + }, + "ignore_missing": { + "type": "boolean" + }, + "integer_sources": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "source_columns", + "destination_columns", + "map_list", + "ignore_missing" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "remove_columns" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_names": { + "type": "array", + "items": { + "type": "string" + } + }, + "ignore_missing": { + "type": "boolean" + } + }, + "required": [ + "column_names", + "ignore_missing" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "remove_rows" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "remove_values": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + } + }, + "required": [ + "column_name", + "remove_values" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "rename_columns" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_mapping": { + "type": "object", + "patternProperties": { + "(.*?)": { + "type": "string" + } + }, + "minProperties":1 + }, + "ignore_missing": { + "type": "boolean" + } + }, + "required": [ + "column_mapping", + "ignore_missing" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "reorder_columns" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "column_order": { + "type": "array", + "items": { + "type": "string" + } + }, + "ignore_missing": { + "type": "boolean" + }, + "keep_others": { + "type": "boolean" + } + }, + "required": [ + "column_order", + "ignore_missing", + "keep_others" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "split_rows" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "anchor_column": { + "type": "string" + }, + "new_events": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]*$": { + "type": "object", + "properties": { + "onset_source": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + }, + "duration": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + }, + "copy_columns": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "onset_source", + "duration", + "copy_columns" + ], + "additionalProperties": false + } + }, + "minProperties": 1 + }, + "remove_parent_event": { + "type": "boolean" + } + }, + "required": [ + "remove_parent_event" + ], + "minProperties": 2, + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_column_names" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + } + }, + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_column_values" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "skip_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "value_columns": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "summary_name", + "summary_filename", + "skip_columns", + "value_columns" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_definitions" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + } + }, + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_hed_tags" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "tags": { + "type": "object", + "properties": { + "Sensory events": { + "type": "array", + "items": { + "type": "string" + } + }, + "Agent actions": { + "type": "array", + "items": { + "type": "string" + } + }, + "Objects": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "Sensory events", + "Agent actions", + "Objects" + ], + "additionalProperties": false + } + }, + "required": [ + "summary_name", + "summary_filename", + "tags" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_hed_type" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "type_tag": { + "type": "string" + } + }, + "required": [ + "summary_name", + "summary_filename", + "type_tag" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_hed_validation" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "check_for_warnings": { + "type": "boolean" + } + }, + "required": [ + "summary_name", + "summary_filename", + "check_for_warnings" + ], + "additionalProperties": false + } + } + } + }, + { + "if": { + "properties": { + "operation": { + "const": "summarize_hed_type" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": { + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "type_tag": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } + }, + "required": [ + "summary_name", + "summary_filename", + "type_tag" + ], + "additionalProperties": false + } + } + } + } + ] + }, + "minItems": 1 +} \ No newline at end of file diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py new file mode 100644 index 000000000..2da9ef0b6 --- /dev/null +++ b/hed/tools/remodeling/validator.py @@ -0,0 +1,87 @@ +import os +import json +from jsonschema import Draft7Validator +from jsonschema.exceptions import ErrorTree + +class RemodelerValidator(): + + def __init__(self): + with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../remodeling/resources/remodeler_schema.json')), 'r') as fp: + self.validator = Draft7Validator(json.load(fp)) + + self.message_templates = { + "0": { + "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", + "type": "Operations must be contained in a list or array. This is also true when you run a single operation." + }, + "1": { + "type": "Each operation must be defined in a dictionary. {instance} is not a dictionary object.", + "required": "Operation dictionary {operation_index} is missing '{missing_value}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.", + "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field '{added_property}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters." + }, + "2": { + "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", + "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", + "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'." + }, + "more": { + "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", + "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", + "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'." + + } + } + + def validate(self, operations): + list_of_error_strings = [] + for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): + list_of_error_strings.append(self._parse_message(error, operations)) + return list_of_error_strings + + def _parse_message(self, error, operations): + ''' Return a user friendly error message based on the jsonschema validation error + + args: + - errors: a list of errors returned from the validator + - operations: the operations that were put in + + Note: + - json schema error does not contain all necessary information to return a + proper error message so we also take some information directly from the operations + that led to the error + ''' + error_dict = vars(error) + print(error_dict) + + level = len(error_dict["path"]) + if level > 2: + level = "more" + # some information is in the validation error but not directly in a field so I need to + # modify before they can parsed in + # if they are necessary, they are there, if they are not there, they are not necessary + try: + error_dict["operation_index"] = error_dict["path"][0] + 1 + error_dict["operation_field"] = error_dict["path"][1].capitalize() + error_dict["operation_name"] = operations[int(error_dict["path"][0])]['operation'] + parameter_path = [*error_dict['path']][:1:-1] #everything except the first two values reversed + for ind, value in enumerate(parameter_path): + if isinstance(value, int): + parameter_path[ind] = f"item {value+1}" + error_dict["parameter_path"] = ", ".join(parameter_path) + except (IndexError, TypeError, KeyError): + pass + + type = str(error_dict["validator"]) + + # the missing value with required elements, or the wrong additional value is not known to the + # validation error object + # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 + # for now the simplest thing seems to be to extract it from the error message + if type == 'required': + error_dict["missing_value"] = error_dict["message"].split("'")[1::2][0] + if type == 'additionalProperties': + error_dict["added_property"] = error_dict["message"].split("'")[1::2][0] + + return self.message_templates[str(level)][type].format(**error_dict) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 07c3304d7..d82b76e96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ openpyxl>=3.1.0 pandas>=1.3.5 portalocker>=2.7.0 semantic_version>=2.10.0 -wordcloud==1.9.2 \ No newline at end of file +wordcloud==1.9.2 +jsonschema==4.18.4 \ No newline at end of file diff --git a/tests/data/remodel_tests/all_remodel_operations.json b/tests/data/remodel_tests/all_remodel_operations.json new file mode 100644 index 000000000..29ec075ba --- /dev/null +++ b/tests/data/remodel_tests/all_remodel_operations.json @@ -0,0 +1,284 @@ +[ + { + "operation": "remove_columns", + "description": "Remove unwanted columns prior to analysis", + "parameters": { + "column_names": [ + "value", + "sample" + ], + "ignore_missing": true + } + }, + { + "operation": "factor_column", + "description": "Create factors for the succesful_stop and unsuccesful_stop values.", + "parameters": { + "column_name": "trial_type", + "factor_values": [ + "succesful_stop", + "unsuccesful_stop" + ], + "factor_names": [ + "stopped", + "stop_failed" + ] + } + }, + { + "operation": "factor_hed_tags", + "description": "Create factors based on whether the event represented a correct or incorrect action.", + "parameters": { + "queries": [ + "correct-action", + "incorrect-action" + ], + "query_names": [ + "correct", + "incorrect" + ], + "remove_types": [], + "expand_context": false + } + }, + { + "operation": "factor_hed_type", + "description": "Factor based on the sex of the images being presented.", + "parameters": { + "type_tag": "Condition-variable", + "type_values": [] + } + }, + { + "operation": "merge_consecutive", + "description": "Merge consecutive *succesful_stop* events that match the *match_columns.", + "parameters": { + "column_name": "trial_type", + "event_code": "succesful_stop", + "match_columns": [ + "stop_signal_delay", + "response_hand", + "sex" + ], + "set_durations": true, + "ignore_missing": true + } + }, + { + "operation": "remap_columns", + "description": "Map response_accuracy and response hand into a single column.", + "parameters": { + "source_columns": [ + "response_accuracy", + "response_hand" + ], + "destination_columns": [ + "response_type" + ], + "map_list": [ + [ + "correct", + "left", + "correct_left" + ], + [ + "correct", + "right", + "correct_right" + ], + [ + "incorrect", + "left", + "incorrect_left" + ], + [ + "incorrect", + "right", + "incorrect_left" + ], + [ + "n/a", + "n/a", + "n/a" + ] + ], + "ignore_missing": true + } + }, + { + "operation": "remove_columns", + "description": "Remove extra columns before the next step.", + "parameters": { + "column_names": [ + "stop_signal_delay", + "response_accuracy", + "face" + ], + "ignore_missing": true + } + }, + { + "operation": "remove_rows", + "description": "Remove rows where trial_type is either succesful_stop or unsuccesful_stop.", + "parameters": { + "column_name": "trial_type", + "remove_values": [ + "succesful_stop", + "unsuccesful_stop" + ] + } + }, + { + "operation": "rename_columns", + "description": "Rename columns to be more descriptive.", + "parameters": { + "column_mapping": { + "stop_signal_delay": "stop_delay", + "response_hand": "hand_used" + }, + "ignore_missing": true + } + }, + { + "operation": "reorder_columns", + "description": "Reorder columns.", + "parameters": { + "column_order": [ + "onset", + "duration", + "response_time", + "trial_type" + ], + "ignore_missing": true, + "keep_others": false + } + }, + { + "operation": "split_rows", + "description": "add response events to the trials.", + "parameters": { + "anchor_column": "trial_type", + "new_events": { + "response": { + "onset_source": [ + "response_time" + ], + "duration": [ + 0 + ], + "copy_columns": [ + "response_accuracy", + "response_hand", + "sex", + "trial_number" + ] + }, + "stop_signal": { + "onset_source": [ + "stop_signal_delay" + ], + "duration": [ + 0.5 + ], + "copy_columns": [ + "trial_number" + ] + } + }, + "remove_parent_event": false + } + }, + { + "operation": "summarize_column_names", + "description": "Summarize column names.", + "parameters": { + "summary_name": "AOMIC_column_names", + "summary_filename": "AOMIC_column_names" + } + }, + { + "operation": "summarize_column_values", + "description": "Summarize the column values in an excerpt.", + "parameters": { + "summary_name": "AOMIC_column_values", + "summary_filename": "AOMIC_column_values", + "skip_columns": [ + "onset", + "duration" + ], + "value_columns": [ + "response_time", + "stop_signal_delay" + ] + } + }, + { + "operation": "summarize_definitions", + "description": "Summarize the definitions used in this dataset.", + "parameters": { + "summary_name": "HED_column_definition_summary", + "summary_filename": "HED_column_definition_summary" + } + }, + { + "operation": "summarize_hed_tags", + "description": "Summarize the HED tags in the dataset.", + "parameters": { + "summary_name": "summarize_hed_tags", + "summary_filename": "summarize_hed_tags", + "tags": { + "Sensory events": [ + "Sensory-event", + "Sensory-presentation", + "Task-stimulus-role", + "Experimental-stimulus" + ], + "Agent actions": [ + "Agent-action", + "Agent", + "Action", + "Agent-task-role", + "Task-action-type", + "Participant-response" + ], + "Objects": [ + "Item" + ] + } + } + }, + { + "operation": "summarize_hed_type", + "description": "Summarize column names.", + "parameters": { + "summary_name": "AOMIC_condition_variables", + "summary_filename": "AOMIC_condition_variables", + "type_tag": "condition-variable" + } + }, + { + "operation": "summarize_hed_validation", + "description": "Summarize validation errors in the sample dataset.", + "parameters": { + "summary_name": "AOMIC_sample_validation", + "summary_filename": "AOMIC_sample_validation", + "check_for_warnings": true + } + }, + { + "operation": "summarize_sidecar_from_events", + "description": "Generate a sidecar from the excerpted events file.", + "parameters": { + "summary_name": "AOMIC_generate_sidecar", + "summary_filename": "AOMIC_generate_sidecar", + "skip_columns": [ + "onset", + "duration" + ], + "value_columns": [ + "response_time", + "stop_signal_delay" + ] + } + } +] \ No newline at end of file diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py new file mode 100644 index 000000000..83b10a7a5 --- /dev/null +++ b/tests/tools/remodeling/test_validator.py @@ -0,0 +1,98 @@ +import os +import json +import unittest +from copy import deepcopy +from hed.tools.remodeling.validator import RemodelerValidator +from jsonschema import Draft7Validator +from jsonschema.exceptions import SchemaError + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + cls.remodeler_schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__.replace('tests', 'hed'))), '../remodeling/resources/remodeler_schema.json')) + + with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '../data/remodel_tests/all_remodel_operations.json'))) as f: + cls.remodel_file = json.load(f) + + @classmethod + def tearDownClass(cls): + pass + + def test_load_schema(self): + with open(self.remodeler_schema_path) as f: + schema = json.load(f) + Draft7Validator.check_schema(schema) + + def test_validate_valid(self): + validator = RemodelerValidator() + error_strings = validator.validate(self.remodel_file) + self.assertFalse(error_strings) + + def test_validate_array(self): + validator = RemodelerValidator() + wrong_input_type = {"operation": "remove_columns"} + error_strings = validator.validate(wrong_input_type) + self.assertEqual(error_strings[0], "Operations must be contained in a list or array. This is also true when you run a single operation.") + + no_operations = [] + error_strings = validator.validate(no_operations) + self.assertEqual(error_strings[0], "There are no operations defined. Specify at least 1 operation for the remodeler to execute.") + + def test_validate_operations(self): + validator = RemodelerValidator() + + invalid_operation_type = ["string"] + error_strings = validator.validate(invalid_operation_type) + self.assertEqual(error_strings[0], "Each operation must be defined in a dictionary. string is not a dictionary object.") + + invalid_operation_missing = [self.remodel_file[0].copy()] + del invalid_operation_missing[0]["description"] + error_strings = validator.validate(invalid_operation_missing) + self.assertEqual(error_strings[0], "Operation dictionary 1 is missing 'description'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.") + + invalid_operation_name = [self.remodel_file[0].copy()] + invalid_operation_name[0]["operation"] = "unlisted_operation" + error_strings = validator.validate(invalid_operation_name) + self.assertEqual(error_strings[0], "unlisted_operation is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.") + + def test_validate_parameters(self): + validator = RemodelerValidator() + + missing_parameter = [deepcopy(self.remodel_file[0])] + del missing_parameter[0]["parameters"]["column_names"] + error_strings = validator.validate(missing_parameter) + self.assertEqual(error_strings[0], "Operation 1: The parameter column_names is missing. column_names is a required parameter of remove_columns.") + + missing_parameter_nested = [deepcopy(self.remodel_file[10])] + del missing_parameter_nested[0]["parameters"]["new_events"]["response"]["onset_source"] + error_strings = validator.validate(missing_parameter_nested) + self.assertEqual(error_strings[0], "Operation 1: The field onset_source is missing in response, new_events. onset_source is a required parameter of response, new_events.") + + invalid_parameter = [deepcopy(self.remodel_file[0])] + invalid_parameter[0]["parameters"]["invalid"] = "invalid_value" + error_strings = validator.validate(invalid_parameter) + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for remove_columns contain an unexpected field 'invalid'.") + + invalid_parameter_nested = [deepcopy(self.remodel_file[10])] + invalid_parameter_nested[0]["parameters"]["new_events"]["response"]["invalid"] = "invalid_value" + error_strings = validator.validate(invalid_parameter_nested) + self.assertEqual(error_strings[0], "Operation 1: Operation parameters for response, new_events contain an unexpected field 'invalid'.") + + invalid_type = [deepcopy(self.remodel_file[0])] + invalid_type[0]["parameters"]["column_names"] = 0 + error_strings = validator.validate(invalid_type) + self.assertEqual(error_strings[0], "Operation 1: The value of column_names, in the remove_columns operation, should be a array. 0 is not a array.") + + invalid_type_nested = [deepcopy(self.remodel_file[10])] + invalid_type_nested[0]["parameters"]["new_events"]["response"]["onset_source"] = {"key": "value"} + error_strings = validator.validate(invalid_type_nested) + self.assertEqual(error_strings[0], "Operation 1: The value of onset_source, response, new_events, in the split_rows operation, should be a array. {'key': 'value'} is not a array.") + + empty_array = [deepcopy(self.remodel_file[0])] + + empty_array_nested = [deepcopy(self.remodel_file[0])] + + + + From 402b44b1e0ea57ebe91dfe77d61f0d4c54255b83 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Sat, 14 Oct 2023 09:25:53 +0200 Subject: [PATCH 02/20] fix incorrect function call --- hed/tools/remodeling/cli/run_remodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 1ba604035..a4ee9bcaa 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -110,7 +110,7 @@ def parse_arguments(arg_list=None): errors = validator.validate(operations) if errors: raise ValueError("UnableToFullyParseOperations", - f"Fatal operation error, cannot continue:\n{validator.error_to_str(errors, operations)}") + f"Fatal operation error, cannot continue:\n{errors}") return args, operations From a0f19d873fd402343b7e18cae59e20f36605bdae Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 30 Oct 2023 12:00:32 +0100 Subject: [PATCH 03/20] replace operation parameter definitions --- .../operations/convert_columns_op.py | 23 ++- .../remodeling/operations/factor_column_op.py | 35 ++++- .../operations/factor_hed_tags_op.py | 43 +++-- .../operations/factor_hed_type_op.py | 24 ++- .../operations/merge_consecutive_op.py | 51 ++++-- .../remodeling/operations/number_groups_op.py | 86 ++++++---- .../remodeling/operations/number_rows_op.py | 53 ++++--- .../remodeling/operations/remap_columns_op.py | 63 ++++++-- .../operations/remove_columns_op.py | 21 ++- .../remodeling/operations/remove_rows_op.py | 24 ++- .../operations/rename_columns_op.py | 24 ++- .../operations/reorder_columns_op.py | 32 +++- .../remodeling/operations/split_rows_op.py | 66 +++++++- .../operations/summarize_column_names_op.py | 26 ++- .../operations/summarize_column_values_op.py | 87 +++++++--- .../operations/summarize_definitions_op.py | 44 ++++-- .../operations/summarize_hed_tags_op.py | 97 +++++++++--- .../operations/summarize_hed_type_op.py | 56 +++++-- .../operations/summarize_hed_validation_op.py | 76 ++++++--- .../summarize_sidecar_from_events_op.py | 45 ++++-- .../remodeling/operations/valid_operations.py | 12 +- .../resources/remodeler_schema.json | 44 +++++- hed/tools/remodeling/validator.py | 148 +++++++++++++----- tests/tools/remodeling/test_validator.py | 5 - 24 files changed, 876 insertions(+), 309 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index e98a8cce5..4e2f68d17 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -15,14 +15,23 @@ class ConvertColumnsOp(BaseOp): """ PARAMS = { - "operation": "convert_columns", - "required_parameters": { - "column_names": list, - "convert_to": str + "type": "object", + "properties": { + "column_names": { + "type": "array" + }, + "convert_to": { + "type": "string" + }, + "decimal_places": { + "type": "integer" + } }, - "optional_parameters": { - "decimal_places": int - } + "required": [ + "column_names", + "convert_to" + ], + "additionalProperties": False } def __init__(self, parameters): diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index e01a81d8b..3c8fbb5af 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -18,13 +18,30 @@ class FactorColumnOp(BaseOp): """ PARAMS = { - "operation": "factor_column", - "required_parameters": { - "column_name": str, - "factor_values": list, - "factor_names": list + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "factor_values": { + "type": "array", + "items": { + "type": "string" + } + }, + "factor_names": { + "type": "array", + "items": { + "type": "string" + } + } }, - "optional_parameters": {} + "required": [ + "column_name", + "factor_values", + "factor_names" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -71,11 +88,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): factor_names = self.factor_names if len(factor_values) == 0: factor_values = df[self.column_name].unique() - factor_names = [self.column_name + '.' + str(column_value) for column_value in factor_values] + factor_names = [self.column_name + '.' + + str(column_value) for column_value in factor_values] df_new = df.copy() for index, factor_value in enumerate(factor_values): - factor_index = df_new[self.column_name].map(str).isin([str(factor_value)]) + factor_index = df_new[self.column_name].map( + str).isin([str(factor_value)]) column = factor_names[index] df_new[column] = factor_index.astype(int) return df_new diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index c5b2ca08f..e22755db7 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -27,15 +27,39 @@ class FactorHedTagsOp(BaseOp): """ PARAMS = { - "operation": "factor_hed_tags", - "required_parameters": { - "queries": list, - "query_names": list, - "remove_types": list + "type": "object", + "properties": { + "queries": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "query_names": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1 + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 0 + }, + "expand_context": { + "type": "boolean" + } }, - "optional_parameters": { - "expand_context": bool - } + "required": [ + "queries", + "query_names", + "remove_types" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -93,7 +117,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): event_man = EventManager(input_data, dispatcher.hed_schema) hed_strings, _ = get_assembled(input_data, sidecar, dispatcher.hed_schema, extra_def_dicts=None, join_columns=True, shrink_defs=False, expand_defs=True) - df_factors = search_strings(hed_strings, self.expression_parsers, query_names=self.query_names) + df_factors = search_strings( + hed_strings, self.expression_parsers, query_names=self.query_names) if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index df13e3631..ccf519858 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -20,12 +20,20 @@ class FactorHedTypeOp(BaseOp): """ PARAMS = { - "operation": "factor_hed_type", - "required_parameters": { - "type_tag": str, - "type_values": list + "type": "object", + "properties": { + "type_tag": { + "type": "string" + }, + "type_values": { + "type": "array" + } }, - "optional_parameters": {} + "required": [ + "type_tag", + "type_values" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -68,10 +76,12 @@ def do_op(self, dispatcher, df, name, sidecar=None): input_data = TabularInput(df, sidecar=sidecar, name=name) df_list = [input_data.dataframe.copy()] - var_manager = HedTypeManager(EventManager(input_data, dispatcher.hed_schema)) + var_manager = HedTypeManager( + EventManager(input_data, dispatcher.hed_schema)) var_manager.add_type(self.type_tag.lower()) - df_factors = var_manager.get_factor_vectors(self.type_tag, self.type_values, factor_encoding="one-hot") + df_factors = var_manager.get_factor_vectors( + self.type_tag, self.type_values, factor_encoding="one-hot") if len(df_factors.columns) > 0: df_list.append(df_factors) df_new = pd.concat(df_list, axis=1) diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 9ce7a16d7..7ad0f9a75 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -16,15 +16,38 @@ class MergeConsecutiveOp(BaseOp): """ PARAMS = { - "operation": "merge_consecutive", - "required_parameters": { - "column_name": str, - "event_code": [str, int, float], - "match_columns": list, - "set_durations": bool, - "ignore_missing": bool + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "event_code": { + "type": [ + "string", + "number" + ] + }, + "match_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "set_durations": { + "type": "boolean" + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_name", + "event_code", + "match_columns", + "set_durations", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -90,7 +113,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("MissingMatchColumns", f"{name}: {str(missing)} columns are unmatched by data columns" f"[{str(df.columns)}] and not ignored") - match_columns = list(set(self.match_columns).intersection(set(df.columns))) + match_columns = list( + set(self.match_columns).intersection(set(df.columns))) df_new = df.copy() code_mask = df_new[self.column_name] == self.event_code @@ -140,8 +164,11 @@ def _update_durations(df_new, remove_groups): remove_df = pd.DataFrame(remove_groups, columns=["remove"]) max_groups = max(remove_groups) for index in range(max_groups): - df_group = df_new.loc[remove_df["remove"] == index + 1, ["onset", "duration"]] + df_group = df_new.loc[remove_df["remove"] + == index + 1, ["onset", "duration"]] max_group = df_group.sum(axis=1, skipna=True).max() anchor = df_group.index[0] - 1 - max_anchor = df_new.loc[anchor, ["onset", "duration"]].sum(skipna=True).max() - df_new.loc[anchor, "duration"] = max(max_group, max_anchor) - df_new.loc[anchor, "onset"] + max_anchor = df_new.loc[anchor, [ + "onset", "duration"]].sum(skipna=True).max() + df_new.loc[anchor, "duration"] = max( + max_group, max_anchor) - df_new.loc[anchor, "onset"] diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index d3a5467db..fcc55f6dd 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -10,14 +10,65 @@ class NumberGroupsOp(BaseOp): """ Implementation in progress. """ PARAMS = { - "operation": "number_groups", - "required_parameters": { - "number_column_name": str, - "source_column": str, - "start": dict, - "stop": dict + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "source_column": { + "type": "string" + }, + "start": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": False + }, + "stop": { + "type": "object", + "properties": { + "values": { + "type": "array" + }, + "inclusion": { + "type": "string", + "enum": [ + "include", + "exclude" + ] + } + }, + "required": [ + "values", + "inclusion" + ], + "additionalProperties": False + }, + "overwrite": { + "type": "boolean" + } }, - "optional_parameters": {"overwrite": bool} + "required": [ + "number_column_name", + "source_column", + "start", + "stop" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -28,27 +79,6 @@ def __init__(self, parameters): self.stop = parameters['stop'] self.start_stop_test = {"values": list, "inclusion": str} self.inclusion_test = ["include", "exclude"] - - required = set(self.start_stop_test.keys()) - for param_to_test in [self.start, self.stop]: - required_missing = required.difference(set(param_to_test.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"Specified {param_to_test} for number_rows requires parameters" - f"{list(required_missing)}") - for param_name, param_value in param_to_test.items(): - param_type = str - if param_name in required: - param_type = self.start_stop_test[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - # TODO: This has a syntax error - # if not isinstance(param_value, param_type): - # raise TypeError("BadType" f"{param_value} has type {type(param_value)} not {param_type}") - if (param_name == 'inclusion') & (param_value not in self.inclusion_test): - raise ValueError("BadValue" f" {param_name} must be one of {self.inclusion_test} not {param_value}") - self.overwrite = parameters.get('overwrite', False) def do_op(self, dispatcher, df, name, sidecar=None): diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index e37b180fb..e79d77159 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -8,11 +8,38 @@ class NumberRowsOp(BaseOp): """ Implementation in progress. """ PARAMS = { - "operation": "number_rows", - "required_parameters": { - "number_column_name": str + "type": "object", + "properties": { + "number_column_name": { + "type": "string" + }, + "overwrite": { + "type": "boolean" + }, + "match_value": { + "type": "object", + "properties": { + "column": { + "type": "string" + }, + "value": { + "type": [ + "string", + "number" + ] + } + }, + "required": [ + "column", + "value" + ], + "additionalProperties": False + } }, - "optional_parameters": {"overwrite": bool, "match_value": dict} + "required": [ + "number_column_name" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -20,23 +47,7 @@ def __init__(self, parameters): self.number_column_name = parameters['number_column_name'] self.overwrite = parameters.get('overwrite', False) self.match_value = parameters.get('match_value', False) - if self.match_value: - self.match_value_params = {"column": str, "value": str} - required = set(self.match_value_params.keys()) - required_missing = required.difference(set(self.match_value.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"Specified match_value for number_rows requires parameters {list(required_missing)}") - for param_name, param_value in self.match_value.items(): - if param_name in required: - param_type = self.match_value_params[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - # TODO: this has a syntax error - # if not isinstance(param_value, param_type): - # raise TypeError("BadType" f"{param_value} has type {type(param_value)} not {param_type}") - + def do_op(self, dispatcher, df, name, sidecar=None): """ Add numbers events dataframe. diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index c83315795..a77e98859 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -26,16 +26,49 @@ class RemapColumnsOp(BaseOp): """ PARAMS = { - "operation": "remap_columns", - "required_parameters": { - "source_columns": list, - "destination_columns": list, - "map_list": list, - "ignore_missing": bool + "type": "object", + "properties": { + "source_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "destination_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "map_list": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + } + }, + "ignore_missing": { + "type": "boolean" + }, + "integer_sources": { + "type": "array", + "items": { + "type": "string" + } + } }, - "optional_parameters": { - "integer_sources": list - } + "required": [ + "source_columns", + "destination_columns", + "map_list", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -68,7 +101,8 @@ def __init__(self, parameters): if not set(self.integer_sources).issubset(set(self.source_columns)): raise ValueError("IntegerSourceColumnsInvalid", f"Integer courses {str(self.integer_sources)} must be in {str(self.source_columns)}") - self.string_sources = list(set(self.source_columns).difference(set(self.integer_sources))) + self.string_sources = list( + set(self.source_columns).difference(set(self.integer_sources))) self.destination_columns = parameters['destination_columns'] self.map_list = parameters['map_list'] self.ignore_missing = parameters['ignore_missing'] @@ -88,8 +122,10 @@ def __init__(self, parameters): self.key_map = self._make_key_map() def _make_key_map(self): - key_df = pd.DataFrame(self.map_list, columns=self.source_columns+self.destination_columns) - key_map = KeyMap(self.source_columns, target_cols=self.destination_columns, name="remap") + key_df = pd.DataFrame( + self.map_list, columns=self.source_columns+self.destination_columns) + key_map = KeyMap(self.source_columns, + target_cols=self.destination_columns, name="remap") key_map.update(key_df) return key_map @@ -110,7 +146,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ df1 = df.copy() - df1[self.source_columns] = df1[self.source_columns].replace(np.NaN, 'n/a') + df1[self.source_columns] = df1[self.source_columns].replace( + np.NaN, 'n/a') for column in self.integer_sources: int_mask = df1[column] != 'n/a' df1.loc[int_mask, column] = df1.loc[int_mask, column].astype(int) diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 267a7039a..8c98c9a81 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -12,12 +12,23 @@ class RemoveColumnsOp(BaseOp): """ PARAMS = { - "operation": "remove_columns", - "required_parameters": { - "column_names": list, - "ignore_missing": bool + "type": "object", + "properties": { + "column_names": { + "type": "array", + "items": { + "type": "string" + } + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_names", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 217fb7934..9654aea8b 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -13,12 +13,26 @@ class RemoveRowsOp(BaseOp): """ PARAMS = { - "operation": "remove_rows", - "required_parameters": { - "column_name": str, - "remove_values": list + "type": "object", + "properties": { + "column_name": { + "type": "string" + }, + "remove_values": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + } }, - "optional_parameters": {} + "required": [ + "column_name", + "remove_values" + ], + "additionalProperties": False } def __init__(self, parameters): diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 2a2f275a9..023339cd2 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -13,12 +13,26 @@ class RenameColumnsOp (BaseOp): """ PARAMS = { - "operation": "rename_columns", - "required_parameters": { - "column_mapping": dict, - "ignore_missing": bool + "type": "object", + "properties": { + "column_mapping": { + "type": "object", + "patternProperties": { + "(.*?)": { + "type": "string" + } + }, + "minProperties": 1 + }, + "ignore_missing": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_mapping", + "ignore_missing" + ], + "additionalProperties": False } def __init__(self, parameters): diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 91fcfcc30..983450c11 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -13,13 +13,27 @@ class ReorderColumnsOp(BaseOp): """ PARAMS = { - "operation": "reorder_columns", - "required_parameters": { - "column_order": list, - "ignore_missing": bool, - "keep_others": bool + "type": "object", + "properties": { + "column_order": { + "type": "array", + "items": { + "type": "string" + } + }, + "ignore_missing": { + "type": "boolean" + }, + "keep_others": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "column_order", + "ignore_missing", + "keep_others" + ], + "additionalProperties": False } def __init__(self, parameters): @@ -59,14 +73,16 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ df_new = df.copy() current_columns = list(df_new.columns) - missing_columns = set(self.column_order).difference(set(df_new.columns)) + missing_columns = set(self.column_order).difference( + set(df_new.columns)) ordered = self.column_order if missing_columns and not self.ignore_missing: raise ValueError("MissingReorderedColumns", f"{str(missing_columns)} are not in dataframe columns " f" [{str(df_new.columns)}] and not ignored.") elif missing_columns: - ordered = [elem for elem in self.column_order if elem not in list(missing_columns)] + ordered = [ + elem for elem in self.column_order if elem not in list(missing_columns)] if self.keep_others: ordered += [elem for elem in current_columns if elem not in ordered] df_new = df_new.loc[:, ordered] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index ea0b5dc13..36f96d098 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -16,13 +16,61 @@ class SplitRowsOp(BaseOp): """ PARAMS = { - "operation": "split_rows", - "required_parameters": { - "anchor_column": str, - "new_events": dict, - "remove_parent_row": bool + "type": "object", + "properties": { + "anchor_column": { + "type": "string" + }, + "new_events": { + "type": "object", + "patternProperties": { + "^[a-zA-Z0-9_]*$": { + "type": "object", + "properties": { + "onset_source": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + }, + "duration": { + "type": "array", + "items": { + "type": [ + "string", + "number" + ] + } + }, + "copy_columns": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "onset_source", + "duration", + "copy_columns" + ], + "additionalProperties": False + } + }, + "minProperties": 1 + }, + "remove_parent_event": { + "type": "boolean" + } }, - "optional_parameters": {} + "required": [ + "remove_parent_event" + ], + "minProperties": 2, + "additionalProperties": False } def __init__(self, parameters): @@ -85,7 +133,8 @@ def _split_rows(self, df, df_list): """ for event, event_parms in self.new_events.items(): add_events = pd.DataFrame([], columns=df.columns) - add_events['onset'] = self._create_onsets(df, event_parms['onset_source']) + add_events['onset'] = self._create_onsets( + df, event_parms['onset_source']) add_events[self.anchor_column] = event self._add_durations(df, add_events, event_parms['duration']) if len(event_parms['copy_columns']) > 0: @@ -103,7 +152,8 @@ def _add_durations(df, add_events, duration_sources): if isinstance(duration, float) or isinstance(duration, int): add_events['duration'] = add_events['duration'].add(duration) elif isinstance(duration, str) and duration in list(df.columns): - add_events['duration'] = add_events['duration'].add(pd.to_numeric(df[duration], errors='coerce')) + add_events['duration'] = add_events['duration'].add( + pd.to_numeric(df[duration], errors='coerce')) else: raise TypeError("BadDurationInModel", f"Remodeling duration {str(duration)} must either be numeric or a column name", "") diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 5770a6185..31752993b 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -17,14 +17,23 @@ class SummarizeColumnNamesOp(BaseOp): """ PARAMS = { - "operation": "summarize_column_names", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = "column_names" @@ -69,7 +78,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = ColumnNamesSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({"name": name, "column_names": list(df_new.columns)}) + summary.update_summary( + {"name": name, "column_names": list(df_new.columns)}) return df_new diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 4c4401881..275b68d19 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -22,18 +22,43 @@ class SummarizeColumnValuesOp(BaseOp): """ PARAMS = { - "operation": "summarize_column_values", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "skip_columns": list, - "value_columns": list + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "skip_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "value_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "append_timecode": { + "type": "boolean" + }, + "max_categorical": { + "type": "integer" + }, + "values_per_line": { + "type": "integer" + } }, - "optional_parameters": { - "append_timecode": bool, - "max_categorical": int, - "values_per_line": int - } + "required": [ + "summary_name", + "summary_filename", + "skip_columns", + "value_columns" + ], + "additionalProperties": False } SUMMARY_TYPE = 'column_values' @@ -62,7 +87,8 @@ def __init__(self, parameters): self.value_columns = parameters['value_columns'] self.append_timecode = parameters.get('append_timecode', False) self.max_categorical = parameters.get('max_categorical', float('inf')) - self.values_per_line = parameters.get('values_per_line', self.VALUES_PER_LINE) + self.values_per_line = parameters.get( + 'values_per_line', self.VALUES_PER_LINE) def do_op(self, dispatcher, df, name, sidecar=None): """ Create a summary of the column values in df. @@ -86,7 +112,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = ColumnValueSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name}) + summary.update_summary( + {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new @@ -109,7 +136,8 @@ def update_summary(self, new_info): name = new_info['name'] if name not in self.summary_dict: self.summary_dict[name] = \ - TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name=name) + TabularSummary(value_cols=self.op.value_columns, + skip_cols=self.op.skip_columns, name=name) self.summary_dict[name].update(new_info['df']) def get_details_dict(self, summary): @@ -123,11 +151,14 @@ def get_details_dict(self, summary): """ this_summary = summary.get_summary(as_json=False) - unique_counts = [(key, len(count_dict)) for key, count_dict in this_summary['Categorical columns'].items()] + unique_counts = [(key, len(count_dict)) for key, + count_dict in this_summary['Categorical columns'].items()] this_summary['Categorical counts'] = dict(unique_counts) for key, dict_entry in this_summary['Categorical columns'].items(): - num_disp, sorted_tuples = ColumnValueSummary.sort_dict(dict_entry, reverse=True) - this_summary['Categorical columns'][key] = dict(sorted_tuples[:min(num_disp, self.op.max_categorical)]) + num_disp, sorted_tuples = ColumnValueSummary.sort_dict( + dict_entry, reverse=True) + this_summary['Categorical columns'][key] = dict( + sorted_tuples[:min(num_disp, self.op.max_categorical)]) return {"Name": this_summary['Name'], "Total events": this_summary["Total events"], "Total files": this_summary['Total files'], "Files": list(this_summary['Files'].keys()), @@ -144,7 +175,8 @@ def merge_all_info(self): TabularSummary - the summary object for column values. """ - all_sum = TabularSummary(value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name='Dataset') + all_sum = TabularSummary( + value_cols=self.op.value_columns, skip_cols=self.op.skip_columns, name='Dataset') for counts in self.summary_dict.values(): all_sum.update_summary(counts) return all_sum @@ -190,10 +222,13 @@ def _get_categorical_string(self, result, offset="", indent=" "): if not cat_dict: return "" count_dict = result['Categorical counts'] - sum_list = [f"{offset}{indent}Categorical column values[Events, Files]:"] + sum_list = [ + f"{offset}{indent}Categorical column values[Events, Files]:"] sorted_tuples = sorted(cat_dict.items(), key=lambda x: x[0]) for entry in sorted_tuples: - sum_list = sum_list + self._get_categorical_col(entry, count_dict, offset="", indent=" ") + sum_list = sum_list + \ + self._get_categorical_col( + entry, count_dict, offset="", indent=" ") return "\n".join(sum_list) def _get_detail_list(self, result, indent=BaseSummary.DISPLAY_INDENT): @@ -209,12 +244,14 @@ def _get_detail_list(self, result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [] specifics = result["Specifics"] - cat_string = self._get_categorical_string(specifics, offset="", indent=indent) + cat_string = self._get_categorical_string( + specifics, offset="", indent=indent) if cat_string: sum_list.append(cat_string) val_dict = specifics.get("Value column summaries", {}) if val_dict: - sum_list.append(ColumnValueSummary._get_value_string(val_dict, offset="", indent=indent)) + sum_list.append(ColumnValueSummary._get_value_string( + val_dict, offset="", indent=indent)) return sum_list def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): @@ -236,7 +273,8 @@ def _get_categorical_col(self, entry, count_dict, offset="", indent=" "): # Create and partition the list of individual entries value_list = [f"{item[0]}{str(item[1])}" for item in entry[1].items()] value_list = value_list[:num_disp] - part_list = ColumnValueSummary.partition_list(value_list, self.op.values_per_line) + part_list = ColumnValueSummary.partition_list( + value_list, self.op.values_per_line) return col_list + [f"{offset}{indent * 3}{ColumnValueSummary.get_list_str(item)}" for item in part_list] @staticmethod @@ -266,5 +304,6 @@ def _get_value_string(val_dict, offset="", indent=""): @staticmethod def sort_dict(count_dict, reverse=False): - sorted_tuples = sorted(count_dict.items(), key=lambda x: x[1][0], reverse=reverse) + sorted_tuples = sorted( + count_dict.items(), key=lambda x: x[1][0], reverse=reverse) return len(sorted_tuples), sorted_tuples diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 28b0c6e55..e7610da79 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -18,14 +18,23 @@ class SummarizeDefinitionsOp(BaseOp): """ PARAMS = { - "operation": "summarize_definitions", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = 'type_defs' @@ -75,7 +84,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): class DefinitionSummary(BaseSummary): def __init__(self, sum_op, hed_schema, known_defs=None): super().__init__(sum_op) - self.def_gatherer = DefExpandGatherer(hed_schema, known_defs=known_defs) + self.def_gatherer = DefExpandGatherer( + hed_schema, known_defs=known_defs) def update_summary(self, new_info): """ Update the summary for a given tabular input file. @@ -87,8 +97,10 @@ def update_summary(self, new_info): - The summary needs a "name" str, a "schema" and a "Sidecar". """ - data_input = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) - series, def_dict = data_input.series_a, data_input.get_def_dict(new_info['schema']) + data_input = TabularInput( + new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + series, def_dict = data_input.series_a, data_input.get_def_dict( + new_info['schema']) self.def_gatherer.process_def_expands(series, def_dict) @staticmethod @@ -101,8 +113,10 @@ def _build_summary_dict(items_dict, title, process_func, display_description=Fal if "#" in str(value): key = key + "/#" if display_description: - description, value = DefinitionSummary._remove_description(value) - items[key] = {"description": description, "contents": str(value)} + description, value = DefinitionSummary._remove_description( + value) + items[key] = {"description": description, + "contents": str(value)} elif isinstance(value, list): items[key] = [str(x) for x in value] else: @@ -124,7 +138,8 @@ def get_details_dict(self, def_gatherer): display_description=True) ambiguous_defs_summary = self._build_summary_dict(def_gatherer.ambiguous_defs, "Ambiguous Definitions", def_gatherer.get_ambiguous_group) - errors_summary = self._build_summary_dict(def_gatherer.errors, "Errors", None) + errors_summary = self._build_summary_dict( + def_gatherer.errors, "Errors", None) known_defs_summary.update(ambiguous_defs_summary) known_defs_summary.update(errors_summary) @@ -166,7 +181,8 @@ def _nested_dict_to_string(data, indent, level=1): for key, value in data.items(): if isinstance(value, dict): result.append(f"{indent * level}{key}: {len(value)} items") - result.append(DefinitionSummary._nested_dict_to_string(value, indent, level + 1)) + result.append(DefinitionSummary._nested_dict_to_string( + value, indent, level + 1)) elif isinstance(value, list): result.append(f"{indent * level}{key}:") for item in value: diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index ffef53fb7..ab6245f5a 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -27,18 +27,65 @@ class SummarizeHedTagsOp(BaseOp): """ PARAMS = { - "operation": "summarize_hed_tags", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "tags": dict + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "tags": { + "type": "object", + "properties": { + "Sensory events": { + "type": "array", + "items": { + "type": "string" + } + }, + "Agent actions": { + "type": "array", + "items": { + "type": "string" + } + }, + "Objects": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "Sensory events", + "Agent actions", + "Objects" + ], + "additionalProperties": False + }, + "append_timecode": { + "type": "boolean" + }, + "include_context": { + "type": "boolean" + }, + "replace_defs": { + "type": "boolean" + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + } + } }, - "optional_parameters": { - "append_timecode": bool, - "include_context": bool, - "replace_defs": bool, - "remove_types": list - } + "required": [ + "summary_name", + "summary_filename", + "tags" + ], + "additionalProperties": False } SUMMARY_TYPE = "hed_tag_summary" @@ -64,7 +111,8 @@ def __init__(self, parameters): self.append_timecode = parameters.get('append_timecode', False) self.include_context = parameters.get('include_context', True) self.replace_defs = parameters.get("replace_defs", True) - self.remove_types = parameters.get("remove_types", ["Condition-variable", "Task"]) + self.remove_types = parameters.get( + "remove_types", ["Condition-variable", "Task"]) def do_op(self, dispatcher, df, name, sidecar=None): """ Summarize the HED tags present in the dataset. @@ -108,11 +156,13 @@ def update_summary(self, new_info): - The summary needs a "name" str, a "schema", a "df, and a "Sidecar". """ - counts = HedTagCounts(new_info['name'], total_events=len(new_info['df'])) - input_data = TabularInput(new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) - tag_man = HedTagManager(EventManager(input_data, new_info['schema']), + counts = HedTagCounts( + new_info['name'], total_events=len(new_info['df'])) + input_data = TabularInput( + new_info['df'], sidecar=new_info['sidecar'], name=new_info['name']) + tag_man = HedTagManager(EventManager(input_data, new_info['schema']), remove_types=self.sum_op.remove_types) - hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, + hed_objs = tag_man.get_hed_objs(include_context=self.sum_op.include_context, replace_defs=self.sum_op.replace_defs) for hed in hed_objs: counts.update_event_counts(hed, new_info['name']) @@ -188,7 +238,8 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Dataset: Total events={result.get('Total events', 0)} " f"Total files={len(result.get('Files', 0))}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) + sum_list = sum_list + \ + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod @@ -204,14 +255,16 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ sum_list = [f"Total events={result.get('Total events', 0)}"] - sum_list = sum_list + HedTagSummary._get_tag_list(result, indent=indent) + sum_list = sum_list + \ + HedTagSummary._get_tag_list(result, indent=indent) return "\n".join(sum_list) @staticmethod def _tag_details(tags): tag_list = [] for tag in tags: - tag_list.append(f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") + tag_list.append( + f"{tag['tag']}[{tag['events']},{len(tag['files'])}]") return tag_list @staticmethod @@ -221,10 +274,12 @@ def _get_tag_list(result, indent=BaseSummary.DISPLAY_INDENT): for category, tags in tag_info['Main tags'].items(): sum_list.append(f"{indent}{indent}{category}:") if tags: - sum_list.append(f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") + sum_list.append( + f"{indent}{indent}{indent}{' '.join(HedTagSummary._tag_details(tags))}") if tag_info['Other tags']: sum_list.append(f"{indent}Other tags[events,files]:") - sum_list.append(f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") + sum_list.append( + f"{indent}{indent}{' '.join(HedTagSummary._tag_details(tag_info['Other tags']))}") return sum_list @staticmethod diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 6aaa4c7ea..8d83c9345 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -23,15 +23,27 @@ class SummarizeHedTypeOp(BaseOp): """ PARAMS = { - "operation": "summarize_hed_type", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "type_tag": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "type_tag": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename", + "type_tag" + ], + "additionalProperties": False } SUMMARY_TYPE = 'hed_type_summary' @@ -102,10 +114,13 @@ def update_summary(self, new_info): sidecar = new_info['sidecar'] if sidecar and not isinstance(sidecar, Sidecar): sidecar = Sidecar(sidecar) - input_data = TabularInput(new_info['df'], sidecar=sidecar, name=new_info['name']) - type_values = HedType(EventManager(input_data, new_info['schema']), new_info['name'], type_tag=self.type_tag) + input_data = TabularInput( + new_info['df'], sidecar=sidecar, name=new_info['name']) + type_values = HedType(EventManager( + input_data, new_info['schema']), new_info['name'], type_tag=self.type_tag) counts = HedTypeCounts(new_info['name'], self.type_tag) - counts.update_summary(type_values.get_summary(), type_values.total_events, new_info['name']) + counts.update_summary(type_values.get_summary(), + type_values.total_events, new_info['name']) counts.add_descriptions(type_values.type_defs) self.summary_dict[new_info["name"]] = counts @@ -183,10 +198,13 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): if item['direct_references']: str1 = str1 + f" Direct references:{item['direct_references']}" if item['events_with_multiple_refs']: - str1 = str1 + f" Multiple references:{item['events_with_multiple_refs']})" + str1 = str1 + \ + f" Multiple references:{item['events_with_multiple_refs']})" sum_list.append(f"{indent}{key}: {str1}") if item['level_counts']: - sum_list = sum_list + HedTypeSummary._level_details(item['level_counts'], indent=indent) + sum_list = sum_list + \ + HedTypeSummary._level_details( + item['level_counts'], indent=indent) return "\n".join(sum_list) @staticmethod @@ -207,12 +225,14 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): f"Total events={result.get('Total events', 0)}"] for key, item in type_info.items(): - sum_list.append(f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") + sum_list.append( + f"{indent*2}{key}: {item['levels']} levels in {item['events']} events") str1 = "" if item['direct_references']: str1 = str1 + f" Direct references:{item['direct_references']}" if item['events_with_multiple_refs']: - str1 = str1 + f" (Multiple references:{item['events_with_multiple_refs']})" + str1 = str1 + \ + f" (Multiple references:{item['events_with_multiple_refs']})" if str1: sum_list.append(f"{indent*3}{str1}") if item['level_counts']: @@ -227,7 +247,9 @@ def _level_details(level_counts, offset="", indent=""): str1 = f"[{details['events']} events, {details['files']} files]:" level_list.append(f"{offset}{indent*2}{key} {str1}") if details['tags']: - level_list.append(f"{offset}{indent*3}Tags: {str(details['tags'])}") + level_list.append( + f"{offset}{indent*3}Tags: {str(details['tags'])}") if details['description']: - level_list.append(f"{offset}{indent*3}Description: {details['description']}") + level_list.append( + f"{offset}{indent*3}Description: {details['description']}") return level_list diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index cd3fc936b..2b1916c66 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -21,15 +21,26 @@ class SummarizeHedValidationOp(BaseOp): """ PARAMS = { - "operation": "summarize_hed_validation", - "required_parameters": { - "summary_name": str, - "summary_filename": str + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "append_timecode": { + "type": "boolean" + }, + "check_for_warnings": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool, - "check_for_warnings": bool - } + "required": [ + "summary_name", + "summary_filename" + ], + "additionalProperties": False } SUMMARY_TYPE = 'hed_validation' @@ -105,14 +116,23 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT): sum_list = [f"{name}: [{len(specifics['sidecar_files'])} sidecar files, " f"{len(specifics['event_files'])} event files]"] if specifics.get('is_merged'): - sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=True, indent=indent) - sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=True, indent=indent) + sum_list = sum_list + \ + self.get_error_list( + specifics['sidecar_issues'], count_only=True, indent=indent) + sum_list = sum_list + \ + self.get_error_list( + specifics['event_issues'], count_only=True, indent=indent) else: - sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], indent=indent*2) + sum_list = sum_list + \ + self.get_error_list( + specifics['sidecar_issues'], indent=indent*2) if specifics['sidecar_had_issues']: - sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2) + sum_list = sum_list + \ + self.get_error_list( + specifics['event_issues'], count_only=False, indent=indent*2) else: - sum_list = sum_list + [f"{indent*2}Event file validation was incomplete because of sidecar errors"] + sum_list = sum_list + \ + [f"{indent*2}Event file validation was incomplete because of sidecar errors"] return "\n".join(sum_list) def update_summary(self, new_info): @@ -127,13 +147,16 @@ def update_summary(self, new_info): sidecar = new_info.get('sidecar', None) if sidecar and not isinstance(sidecar, Sidecar): - sidecar = Sidecar(files=new_info['sidecar'], name=os.path.basename(sidecar)) - results = self._get_sidecar_results(sidecar, new_info, self.check_for_warnings) + sidecar = Sidecar( + files=new_info['sidecar'], name=os.path.basename(sidecar)) + results = self._get_sidecar_results( + sidecar, new_info, self.check_for_warnings) if not results['sidecar_had_issues']: input_data = TabularInput(new_info['df'], sidecar=sidecar) issues = input_data.validate(new_info['schema']) if not self.check_for_warnings: - issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR) + issues = ErrorHandler.filter_issues_by_severity( + issues, ErrorSeverity.ERROR) results['event_issues'][new_info["name"]] = issues results['total_event_issues'] = len(issues) self.summary_dict[new_info["name"]] = results @@ -182,7 +205,8 @@ def _update_events_results(results, ind_results): @staticmethod def _update_sidecar_results(results, ind_results): results["total_sidecar_issues"] += ind_results["total_sidecar_issues"] - results["sidecar_files"] = results["sidecar_files"] + ind_results["sidecar_files"] + results["sidecar_files"] = results["sidecar_files"] + \ + ind_results["sidecar_files"] for ikey, errors in ind_results["sidecar_issues"].items(): results["sidecar_issues"][ikey] = errors @@ -203,24 +227,29 @@ def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDE elif not len(item): error_list.append(f"{indent}{key} has no issues") else: - HedValidationSummary._format_errors(error_list, key, item, indent) + HedValidationSummary._format_errors( + error_list, key, item, indent) return error_list @staticmethod def _format_errors(error_list, name, errors, indent): error_list.append(f"{indent}{name} issues:") for this_item in errors: - error_list.append(f"{indent * 2}{HedValidationSummary._format_error(this_item)}") + error_list.append( + f"{indent * 2}{HedValidationSummary._format_error(this_item)}") @staticmethod def _format_error(error): error_str = error['code'] error_locations = [] - HedValidationSummary.update_error_location(error_locations, "row", "ec_row", error) - HedValidationSummary.update_error_location(error_locations, "column", "ec_column", error) + HedValidationSummary.update_error_location( + error_locations, "row", "ec_row", error) + HedValidationSummary.update_error_location( + error_locations, "column", "ec_column", error) HedValidationSummary.update_error_location(error_locations, "sidecar column", "ec_sidecarColumnName", error) - HedValidationSummary.update_error_location(error_locations, "sidecar key", "ec_sidecarKeyName", error) + HedValidationSummary.update_error_location( + error_locations, "sidecar key", "ec_sidecarKeyName", error) location_str = ",".join(error_locations) if location_str: error_str = error_str + f"[{location_str}]" @@ -241,7 +270,8 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results["sidecar_files"].append(sidecar.name) results["sidecar_issues"][sidecar.name] = [] sidecar_issues = sidecar.validate(new_info['schema']) - filtered_issues = ErrorHandler.filter_issues_by_severity(sidecar_issues, ErrorSeverity.ERROR) + filtered_issues = ErrorHandler.filter_issues_by_severity( + sidecar_issues, ErrorSeverity.ERROR) if filtered_issues: results["sidecar_had_issues"] = True if not check_for_warnings: diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index f584ee1d3..73d67940d 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -20,16 +20,37 @@ class SummarizeSidecarFromEventsOp(BaseOp): """ PARAMS = { - "operation": "summarize_sidecar_from_events", - "required_parameters": { - "summary_name": str, - "summary_filename": str, - "skip_columns": list, - "value_columns": list, + "type": "object", + "properties": { + "summary_name": { + "type": "string" + }, + "summary_filename": { + "type": "string" + }, + "skip_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "value_columns": { + "type": "array", + "items": { + "type": "string" + } + }, + "append_timecode": { + "type": "boolean" + } }, - "optional_parameters": { - "append_timecode": bool - } + "required": [ + "summary_name", + "summary_filename", + "skip_columns", + "value_columns" + ], + "additionalProperties": False } SUMMARY_TYPE = "events_to_sidecar" @@ -78,7 +99,8 @@ def do_op(self, dispatcher, df, name, sidecar=None): if not summary: summary = EventsToSidecarSummary(self) dispatcher.summary_dicts[self.summary_name] = summary - summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name}) + summary.update_summary( + {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new @@ -100,7 +122,8 @@ def update_summary(self, new_info): """ - tab_sum = TabularSummary(value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) + tab_sum = TabularSummary( + value_cols=self.value_cols, skip_cols=self.skip_cols, name=new_info["name"]) tab_sum.update(new_info['df'], new_info['name']) self.summary_dict[new_info["name"]] = tab_sum diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py index 8753ed1d6..52cf41162 100644 --- a/hed/tools/remodeling/operations/valid_operations.py +++ b/hed/tools/remodeling/operations/valid_operations.py @@ -1,6 +1,6 @@ """ The valid operations for the remodeling tools. """ -# from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp +from hed.tools.remodeling.operations.convert_columns_op import ConvertColumnsOp from hed.tools.remodeling.operations.factor_column_op import FactorColumnOp from hed.tools.remodeling.operations.factor_hed_tags_op import FactorHedTagsOp from hed.tools.remodeling.operations.factor_hed_type_op import FactorHedTypeOp @@ -27,8 +27,8 @@ 'factor_hed_tags': FactorHedTagsOp, 'factor_hed_type': FactorHedTypeOp, 'merge_consecutive': MergeConsecutiveOp, - 'number_groups_op': NumberGroupsOp, - 'number_rows_op': NumberRowsOp, + 'number_groups': NumberGroupsOp, + 'number_rows': NumberRowsOp, 'remap_columns': RemapColumnsOp, 'remove_columns': RemoveColumnsOp, 'remove_rows': RemoveRowsOp, @@ -38,8 +38,8 @@ 'summarize_column_names': SummarizeColumnNamesOp, 'summarize_column_values': SummarizeColumnValuesOp, 'summarize_definitions': SummarizeDefinitionsOp, - 'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp, - 'summarize_hed_type': SummarizeHedTypeOp, 'summarize_hed_tags': SummarizeHedTagsOp, - 'summarize_hed_validation': SummarizeHedValidationOp + 'summarize_hed_type': SummarizeHedTypeOp, + 'summarize_hed_validation': SummarizeHedValidationOp, + 'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp } diff --git a/hed/tools/remodeling/resources/remodeler_schema.json b/hed/tools/remodeling/resources/remodeler_schema.json index e44a25eca..e4a40ed68 100644 --- a/hed/tools/remodeling/resources/remodeler_schema.json +++ b/hed/tools/remodeling/resources/remodeler_schema.json @@ -48,7 +48,7 @@ "if": { "properties": { "operation": { - "const": "convert_column" + "const": "convert_columns" } }, "required": [ @@ -538,7 +538,7 @@ "type": "string" } }, - "minProperties":1 + "minProperties": 1 }, "ignore_missing": { "type": "boolean" @@ -730,6 +730,15 @@ "items": { "type": "string" } + }, + "append_timecode": { + "type": "boolean" + }, + "max_categorical": { + "type": "integer" + }, + "values_per_line": { + "type": "integer" } }, "required": [ @@ -825,6 +834,21 @@ "Objects" ], "additionalProperties": false + }, + "append_timecode": { + "type": "boolean" + }, + "include_context": { + "type": "boolean" + }, + "replace_defs": { + "type": "boolean" + }, + "remove_types": { + "type": "array", + "items": { + "type": "string" + } } }, "required": [ @@ -861,6 +885,9 @@ }, "type_tag": { "type": "string" + }, + "append_timecode": { + "type": "boolean" } }, "required": [ @@ -895,6 +922,9 @@ "summary_filename": { "type": "string" }, + "append_timecode": { + "type": "boolean" + }, "check_for_warnings": { "type": "boolean" } @@ -913,7 +943,7 @@ "if": { "properties": { "operation": { - "const": "summarize_hed_type" + "const": "summarize_sidecar_from_events" } }, "required": [ @@ -931,17 +961,17 @@ "summary_filename": { "type": "string" }, - "type_tag": { - "type": "string" - }, "append_timecode": { "type": "boolean" + }, + "check_for_warnings": { + "type": "boolean" } }, "required": [ "summary_name", "summary_filename", - "type_tag" + "check_for_warnings" ], "additionalProperties": false } diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py index 2da9ef0b6..c62c7eccd 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/validator.py @@ -1,48 +1,100 @@ import os import json +from copy import deepcopy from jsonschema import Draft7Validator from jsonschema.exceptions import ErrorTree +from hed.tools.remodeling.operations.valid_operations import valid_operations + class RemodelerValidator(): - def __init__(self): - with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../remodeling/resources/remodeler_schema.json')), 'r') as fp: - self.validator = Draft7Validator(json.load(fp)) - - self.message_templates = { - "0": { - "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", - "type": "Operations must be contained in a list or array. This is also true when you run a single operation." - }, - "1": { - "type": "Each operation must be defined in a dictionary. {instance} is not a dictionary object.", - "required": "Operation dictionary {operation_index} is missing '{missing_value}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.", - "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field '{added_property}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters." + MESSAGE_STRINGS = { + "0": { + "minItems": "There are no operations defined. Specify at least 1 operation for the remodeler to execute.", + "type": "Operations must be contained in a list or array. This is also true when you run a single operation." + }, + "1": { + "type": "Each operation must be defined in a dictionary. {instance} is not a dictionary object.", + "required": "Operation dictionary {operation_index} is missing '{missing_value}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters.", + "additionalProperties": "Operation dictionary {operation_index} contains an unexpected field '{added_property}'. Every operation dictionary must specify the type of operation, a description, and the operation parameters." + }, + "2": { + "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", + "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", + "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'." + }, + "more": { + "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", + "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", + "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", + "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'." + + } + } + + BASE_ARRAY = { + "type": "array", + "items": {}, + "minItems": 1 + } + + OPERATION_DICT = { + "type": "object", + "required": [ + "operation", + "description", + "parameters" + ], + "additionalProperties": False, + "properties": { + "operation": { + "type": "string", + "enum": [], + "default": "convert_columns" }, - "2": { - "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", - "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", - "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'." + "description": { + "type": "string" }, - "more": { - "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", - "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", - "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'." - + "parameters": { + "type": "object", + "properties": {} } + }, + "allOf": [] + } + + PARAMETER_SPECIFICATION_TEMPLATE = { + "if": { + "properties": { + "operation": { + "const": "" + } + }, + "required": [ + "operation" + ] + }, + "then": { + "properties": { + "parameters": {} } + } + } + + def __init__(self): + self.validator = Draft7Validator(self._construct_schema()) def validate(self, operations): list_of_error_strings = [] for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): - list_of_error_strings.append(self._parse_message(error, operations)) + list_of_error_strings.append( + self._parse_message(error, operations)) return list_of_error_strings def _parse_message(self, error, operations): ''' Return a user friendly error message based on the jsonschema validation error - + args: - errors: a list of errors returned from the validator - operations: the operations that were put in @@ -54,34 +106,56 @@ def _parse_message(self, error, operations): ''' error_dict = vars(error) print(error_dict) - + level = len(error_dict["path"]) if level > 2: level = "more" - # some information is in the validation error but not directly in a field so I need to + # some information is in the validation error but not directly in a field so I need to # modify before they can parsed in - # if they are necessary, they are there, if they are not there, they are not necessary + # if they are necessary, they are there, if they are not there, they are not necessary try: error_dict["operation_index"] = error_dict["path"][0] + 1 error_dict["operation_field"] = error_dict["path"][1].capitalize() - error_dict["operation_name"] = operations[int(error_dict["path"][0])]['operation'] - parameter_path = [*error_dict['path']][:1:-1] #everything except the first two values reversed + error_dict["operation_name"] = operations[int( + error_dict["path"][0])]['operation'] + # everything except the first two values reversed + parameter_path = [*error_dict['path']][:1:-1] for ind, value in enumerate(parameter_path): if isinstance(value, int): parameter_path[ind] = f"item {value+1}" error_dict["parameter_path"] = ", ".join(parameter_path) except (IndexError, TypeError, KeyError): pass - + type = str(error_dict["validator"]) - - # the missing value with required elements, or the wrong additional value is not known to the + + # the missing value with required elements, or the wrong additional value is not known to the # validation error object # this is a known issue of jsonschema: https://github.com/python-jsonschema/jsonschema/issues/119 # for now the simplest thing seems to be to extract it from the error message if type == 'required': - error_dict["missing_value"] = error_dict["message"].split("'")[1::2][0] + error_dict["missing_value"] = error_dict["message"].split("'")[ + 1::2][0] if type == 'additionalProperties': - error_dict["added_property"] = error_dict["message"].split("'")[1::2][0] + error_dict["added_property"] = error_dict["message"].split("'")[ + 1::2][0] + + return self.MESSAGE_STRINGS[str(level)][type].format(**error_dict) + + def _construct_schema(self): + + schema = deepcopy(self.BASE_ARRAY) + schema["items"] = deepcopy(self.OPERATION_DICT) + + for operation in valid_operations.items(): + schema["items"]["properties"]["operation"]["enum"].append(operation[0]) + + parameter_specification = deepcopy(self.PARAMETER_SPECIFICATION_TEMPLATE) + parameter_specification["if"]["properties"]["operation"]["const"] = operation[0] + parameter_specification["then"]["properties"]["parameters"] = operation[1].PARAMS + #print(parameter_specification) + + schema["items"]["allOf"].append(deepcopy(parameter_specification)) + #print(schema) - return self.message_templates[str(level)][type].format(**error_dict) \ No newline at end of file + return schema diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 83b10a7a5..762af0091 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -19,11 +19,6 @@ def setUpClass(cls): def tearDownClass(cls): pass - def test_load_schema(self): - with open(self.remodeler_schema_path) as f: - schema = json.load(f) - Draft7Validator.check_schema(schema) - def test_validate_valid(self): validator = RemodelerValidator() error_strings = validator.validate(self.remodel_file) From f4644a559d69495a6ef7b4a0bbc6d288bd8b4c47 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 30 Oct 2023 13:23:02 +0100 Subject: [PATCH 04/20] add operation name --- hed/tools/remodeling/operations/convert_columns_op.py | 3 ++- hed/tools/remodeling/operations/factor_column_op.py | 3 ++- hed/tools/remodeling/operations/factor_hed_tags_op.py | 3 ++- hed/tools/remodeling/operations/factor_hed_type_op.py | 3 ++- hed/tools/remodeling/operations/merge_consecutive_op.py | 2 ++ hed/tools/remodeling/operations/number_groups_op.py | 3 ++- hed/tools/remodeling/operations/number_rows_op.py | 2 ++ hed/tools/remodeling/operations/remap_columns_op.py | 1 + hed/tools/remodeling/operations/remove_columns_op.py | 3 ++- hed/tools/remodeling/operations/remove_rows_op.py | 3 ++- hed/tools/remodeling/operations/rename_columns_op.py | 3 ++- hed/tools/remodeling/operations/reorder_columns_op.py | 3 ++- hed/tools/remodeling/operations/split_rows_op.py | 3 ++- hed/tools/remodeling/operations/summarize_column_names_op.py | 3 ++- hed/tools/remodeling/operations/summarize_column_values_op.py | 3 ++- hed/tools/remodeling/operations/summarize_definitions_op.py | 3 ++- hed/tools/remodeling/operations/summarize_hed_tags_op.py | 3 ++- hed/tools/remodeling/operations/summarize_hed_type_op.py | 2 ++ hed/tools/remodeling/operations/summarize_hed_validation_op.py | 3 ++- .../remodeling/operations/summarize_sidecar_from_events_op.py | 3 ++- 20 files changed, 39 insertions(+), 16 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 4e2f68d17..1fbd39f20 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -13,7 +13,8 @@ class ConvertColumnsOp(BaseOp): """ - + NAME = "convert_columns" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 3c8fbb5af..af08db511 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -16,7 +16,8 @@ class FactorColumnOp(BaseOp): """ - + NAME = "factor_column" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index e22755db7..e670172d2 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -25,7 +25,8 @@ class FactorHedTagsOp(BaseOp): - When the context is expanded, the effect of events for temporal extent is accounted for. - Context expansion is not implemented in the current version. """ - + NAME = "factor_hed_tags" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index ccf519858..a2df3e446 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -18,7 +18,8 @@ class FactorHedTypeOp(BaseOp): - **type_values** (*list*): Factor values to include. If empty all values of that type_tag are used. """ - + NAME = "factor_hed_type" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 7ad0f9a75..082bda981 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -15,6 +15,8 @@ class MergeConsecutiveOp(BaseOp): - **ignore_missing** (*bool*): If true, missing match_columns are ignored. """ + NAME = "merge_consecutive" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index fcc55f6dd..319439c88 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -8,7 +8,8 @@ class NumberGroupsOp(BaseOp): """ Implementation in progress. """ - + NAME = "number_groups" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index e79d77159..507f41f3c 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -7,6 +7,8 @@ class NumberRowsOp(BaseOp): """ Implementation in progress. """ + NAME = "number_rows" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index a77e98859..34c84edd5 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -24,6 +24,7 @@ class RemapColumnsOp(BaseOp): TODO: Allow wildcards """ + NAME = "remap_columns" PARAMS = { "type": "object", diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 8c98c9a81..952ce41ab 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -10,7 +10,8 @@ class RemoveColumnsOp(BaseOp): - **ignore_missing** (*boolean*): If true, names in remove_names that are not columns in df should be ignored. """ - + NAME = "remove_columns" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 9654aea8b..9a8018e67 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -11,7 +11,8 @@ class RemoveRowsOp(BaseOp): - **remove_values** (*list*): The values to test for row removal. """ - + NAME = "remove_rows" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 023339cd2..661991fb8 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -11,7 +11,8 @@ class RenameColumnsOp (BaseOp): - **ignore_missing** (*bool*): If true, the names in remove_names that are not columns and should be ignored. """ - + NAME = "rename_columns" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 983450c11..09f2116e2 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -11,7 +11,8 @@ class ReorderColumnsOp(BaseOp): - keep_others (*bool*): If true, columns not in column_order are placed at end. """ - + NAME = "reorder_columns" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 36f96d098..0190cc543 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -14,7 +14,8 @@ class SplitRowsOp(BaseOp): - **remove_parent_row** (*bool*): If true, the original row that was split is removed. """ - + NAME = "split_rows" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 31752993b..b392d1a72 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -15,7 +15,8 @@ class SummarizeColumnNamesOp(BaseOp): The purpose is to check that all the tabular files have the same columns in same order. """ - + NAME = "summarize_column_names" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 275b68d19..9f691fdab 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -20,7 +20,8 @@ class SummarizeColumnValuesOp(BaseOp): The purpose is to produce a summary of the values in a tabular file. """ - + NAME = "summarize_column_values" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index e7610da79..7f1e54572 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -16,7 +16,8 @@ class SummarizeDefinitionsOp(BaseOp): The purpose is to produce a summary of the values in a tabular file. """ - + NAME = "summarize_definitions" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index ab6245f5a..bf780e7b0 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -25,7 +25,8 @@ class SummarizeHedTagsOp(BaseOp): """ - + NAME = "summarize_hed_tags" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 8d83c9345..1811beb7a 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -22,6 +22,8 @@ class SummarizeHedTypeOp(BaseOp): """ + NAME = "summarize_hed_type" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 2b1916c66..854099e07 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -19,7 +19,8 @@ class SummarizeHedValidationOp(BaseOp): The purpose of this op is to produce a summary of the HED validation errors in a file. """ - + NAME = "summarize_hed_validation" + PARAMS = { "type": "object", "properties": { diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 73d67940d..c5549d6b5 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -18,7 +18,8 @@ class SummarizeSidecarFromEventsOp(BaseOp): The purpose is to produce a JSON sidecar template for annotating a dataset with HED tags. """ - + NAME = "summarize_sidecar_from_events" + PARAMS = { "type": "object", "properties": { From 1fe63e384ab1c059c8606c33fe95f2df7b8d5d47 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 14 Nov 2023 13:50:06 +0100 Subject: [PATCH 05/20] remove original validation functions and tests --- hed/tools/remodeling/dispatcher.py | 34 ++-------- hed/tools/remodeling/operations/base_op.py | 62 ++----------------- .../operations/rename_columns_op.py | 2 +- .../remodeling/operations/test_base_op.py | 25 -------- tests/tools/remodeling/test_dispatcher.py | 53 ---------------- tests/tools/remodeling/test_validator.py | 3 + 6 files changed, 13 insertions(+), 166 deletions(-) diff --git a/hed/tools/remodeling/dispatcher.py b/hed/tools/remodeling/dispatcher.py index 2bfb90b3f..039b05f27 100644 --- a/hed/tools/remodeling/dispatcher.py +++ b/hed/tools/remodeling/dispatcher.py @@ -22,7 +22,7 @@ def __init__(self, operation_list, data_root=None, """ Constructor for the dispatcher. Parameters: - operation_list (list): List of unparsed operations. + operation_list (list): List of valid unparsed operations. data_root (str or None): Root directory for the dataset. If none, then backups are not made. hed_versions (str, list, HedSchema, or HedSchemaGroup): The HED schema. @@ -42,11 +42,7 @@ def __init__(self, operation_list, data_root=None, raise HedFileError("BackupDoesNotExist", f"Remodeler cannot be run with a dataset without first creating the " f"{self.backup_name} backup for {self.data_root}", "") - op_list, errors = self.parse_operations(operation_list) - if errors: - these_errors = self.errors_to_str(errors, 'Dispatcher failed due to invalid operations') - raise ValueError("InvalidOperationList", f"{these_errors}") - self.parsed_ops = op_list + self.parsed_ops = self.parse_operations(operation_list) self.hed_schema = self.get_schema(hed_versions) self.summary_dicts = {} @@ -183,31 +179,11 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s @staticmethod def parse_operations(operation_list): - errors = [] operations = [] for index, item in enumerate(operation_list): - try: - if not isinstance(item, dict): - raise TypeError("InvalidOperationFormat", - f"Each operations must be a dictionary but operation {str(item)} is {type(item)}") - if "operation" not in item: - raise KeyError("MissingOperation", - f"operation {str(item)} does not have a operation key") - if "parameters" not in item: - raise KeyError("MissingParameters", - f"Operation {str(item)} does not have a parameters key") - if item["operation"] not in valid_operations: - raise KeyError("OperationNotListedAsValid", - f"Operation {item['operation']} must be added to operations_list " - f"before it can be executed.") - new_operation = valid_operations[item["operation"]](item["parameters"]) - operations.append(new_operation) - except Exception as ex: - errors.append({"index": index, "item": f"{item}", "error_type": type(ex), - "error_code": ex.args[0], "error_msg": ex.args[1]}) - if errors: - return [], errors - return operations, [] + new_operation = valid_operations[item["operation"]](item["parameters"]) + operations.append(new_operation) + return operations @staticmethod def prep_data(df): diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index bc3e906c6..9624d3f0a 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -8,7 +8,7 @@ class BaseOp: """ - def __init__(self, op_spec, parameters): + def __init__(self, name, parameters): """ Base class constructor for operations. Parameters: @@ -26,46 +26,10 @@ def __init__(self, op_spec, parameters): - If the specification is missing a valid operation. """ - self.operation = op_spec.get("operation", "") - if not self.operation: - raise ValueError("OpMustHaveOperation", "Op must have operation is empty") - self.required_params = op_spec.get("required_parameters", {}) - self.optional_params = op_spec.get("optional_parameters", {}) - self.check_parameters(parameters) - - def check_parameters(self, parameters): - """ Verify that the parameters meet the operation specification. - - Parameters: - parameters (dict): Dictionary of parameters for this operation. - - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - """ - - required = set(self.required_params.keys()) - required_missing = required.difference(set(parameters.keys())) - if required_missing: - raise KeyError("MissingRequiredParameters", - f"{self.operation} requires parameters {list(required_missing)}") - for param_name, param_value in parameters.items(): - if param_name in self.required_params: - param_type = self.required_params[param_name] - elif param_name in self.optional_params: - param_type = self.optional_params[param_name] - else: - raise KeyError("BadParameter", - f"{param_name} not a required or optional parameter for {self.operation}") - if isinstance(param_type, list): - self._check_list_type(param_value, param_type) - elif not isinstance(param_value, param_type): - raise TypeError("BadType", f"{param_value} has type {type(param_value)} not {param_type}") + self.name = name + self.parameters = parameters + def do_op(self, dispatcher, df, name, sidecar=None): """ Base class method to be overridden by each operation. @@ -78,21 +42,3 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ return df.copy() - - @staticmethod - def _check_list_type(param_value, param_type): - """ Check a parameter value against its specified type. - - Parameters: - param_value (any): The value to be checked. - param_type (any): Class to check the param_value against. - - :raises TypeError: - - If param_value is not an instance of param_type. - - """ - - for this_type in param_type: - if isinstance(param_value, this_type): - return - raise TypeError("BadType", f"{param_value} has type {type(param_value)} which is not in {str(param_type)}") diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 661991fb8..936af4e48 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -50,7 +50,7 @@ def __init__(self, parameters): - If a parameter has the wrong type. """ - super().__init__(self.PARAMS, parameters) + super().__init__("rename_columns", parameters) self.column_mapping = parameters['column_mapping'] if parameters['ignore_missing']: self.error_handling = 'ignore' diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index 096a0ba12..bbcab9002 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -65,30 +65,5 @@ def test_constructor_no_parameters(self): op1 = BaseOp({"operation": "no_parameter_operation"}, {}) self.assertIsInstance(op1, BaseOp, "constructor allows a operation with no parameters") - def test_check_parameters_bad_element(self): - parameters = json.loads(self.json_parameters) - parameters["is_multiple"] = {"a": 1, "b": 2} - with self.assertRaises(TypeError) as context: - BaseOp(self.params, parameters) - self.assertEqual(context.exception.args[0], 'BadType') - - def test_parse_operations_missing_required(self): - parameters = json.loads(self.json_parameters) - parameters.pop("is_string") - with self.assertRaises(KeyError) as context: - BaseOp(TestOp.PARAMS, parameters) - self.assertEqual(context.exception.args[0], 'MissingRequiredParameters') - - def test_check_parameters_test(self): - parameters1 = {"op_name": "test", "skip_columns": ["onset", "duration"], "keep_all": True, "junk": "bad_parm"} - with self.assertRaises(KeyError) as context1: - BaseOp(TestOp.PARAMS, parameters1) - self.assertEqual(context1.exception.args[0], 'BadParameter') - parameters2 = {"op_name": "test", "skip_columns": ["onset", "duration"], "keep_all": "true"} - with self.assertRaises(TypeError) as context2: - TestOp(parameters2) - self.assertEqual(context2.exception.args[0], 'BadType') - - if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/test_dispatcher.py b/tests/tools/remodeling/test_dispatcher.py index b91a3c6f8..ad6a01888 100644 --- a/tests/tools/remodeling/test_dispatcher.py +++ b/tests/tools/remodeling/test_dispatcher.py @@ -57,17 +57,6 @@ def test_constructor_empty_operations(self): self.assertIsInstance(disp, Dispatcher, "") self.assertFalse(disp.parsed_ops, "constructor empty operations list has empty parsed ops") - def test_constructor_bad_no_operation(self): - test = [{"operation": "remove_rows", "parameters": {"column_name": "response_time", "remove_values": ["n/a"]}}, - {"parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if no operation") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with no operation") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing operation") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing operation") - def test_get_data_file(self): model_path1 = os.path.join(self.data_path, 'simple_reorder_rmdl.json') with open(model_path1) as fp: @@ -92,46 +81,6 @@ def test_get_summary_save_dir(self): dispatch2.get_summary_save_dir() self.assertEqual(context.exception.code, 'NoDataRoot') - def test_parse_operations_errors(self): - test = [{"operation": "remove_rows", "parameters": {"column_name": "response_time", "remove_values": ["n/a"]}}, - {"operation": "remove_rows"}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if no parameters") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with no parameters") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing parameters") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing parameters") - - test = [{"operation": "remove_rows", - "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}, - {"operation": "remove_rows", "parameters": {"column_name": "response_time"}}] - operations, errors = Dispatcher.parse_operations(test) - self.assertFalse(operations, "parse_operations returns empty if missing required") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if one operation with missing required") - self.assertEqual(errors[0]['index'], 1, "parse_operation error has the correct index for missing parameters") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for missing required") - with self.assertRaises(ValueError) as context: - Dispatcher(test) - self.assertEqual(context.exception.args[0], 'InvalidOperationList') - - test2 = [{"operation": "blimey", - "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}] - operations, errors = Dispatcher.parse_operations(test2) - self.assertFalse(operations, "parse_operations returns empty if missing required") - self.assertEqual(len(errors), 1, - "parse_operation returns a list of one error if bad operation") - self.assertEqual(errors[0]['index'], 0, "parse_operation error has the correct index for bad operation") - self.assertEqual(errors[0]['error_type'], KeyError, - "parse_operation error has the correct type for bad operation") - self.assertEqual(errors[0]['error_code'], 'OperationNotListedAsValid''', - "parse_operation error has has correct code for bad operation") - with self.assertRaises(ValueError) as context: - Dispatcher(test2) - self.assertEqual(context.exception.args[0], 'InvalidOperationList') - def test_parse_operation_list(self): test = [{"operation": "remove_rows", "parameters": {"column_name": "trial_type", "remove_values": ["succesful_stop", "unsuccesful_stop"]}}, @@ -176,8 +125,6 @@ def test_run_operations_hed(self): } } ] - operations, errors = Dispatcher.parse_operations(op_list) - self.assertFalse(errors) dispatch = Dispatcher(op_list, hed_versions=['8.1.0']) df = dispatch.run_operations(events_path, sidecar=sidecar_path, verbose=False) self.assertIsInstance(df, pd.DataFrame) diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 762af0091..c00293261 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -19,6 +19,9 @@ def setUpClass(cls): def tearDownClass(cls): pass + def test_validator_build(self): + validator = RemodelerValidator() + def test_validate_valid(self): validator = RemodelerValidator() error_strings = validator.validate(self.remodel_file) From fd09c2c13068794556af6479250bc578af60b3f2 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 21 Nov 2023 11:48:26 +0100 Subject: [PATCH 06/20] change base op to abstract and write tests --- hed/tools/remodeling/operations/base_op.py | 34 ++++----- .../remodeling/operations/test_base_op.py | 69 +++++++++---------- 2 files changed, 45 insertions(+), 58 deletions(-) diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index 9624d3f0a..f8df37715 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -1,35 +1,25 @@ """ Base class for remodeling operations. """ +from abc import ABC, abstractmethod -class BaseOp: +class BaseOp(ABC): """ Base class for operations. All remodeling operations should extend this class. - The base class holds the parameters and does basic parameter checking against the operation's specification. - """ + @property + @abstractmethod + def NAME(self): + pass - def __init__(self, name, parameters): - """ Base class constructor for operations. - - Parameters: - op_spec (dict): Specification for required and optional parameters. - parameters (dict): Actual values of the parameters for the operation. - - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. + @property + @abstractmethod + def PARAMS(self): + pass - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - - """ - self.name = name + def __init__(self, parameters): self.parameters = parameters - + @abstractmethod def do_op(self, dispatcher, df, name, sidecar=None): """ Base class method to be overridden by each operation. diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index bbcab9002..2bef3e5da 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -4,21 +4,20 @@ class TestOp(BaseOp): + NAME = "test" PARAMS = { - "operation": "test_op", - "required_parameters": { - "op_name": str, - "skip_columns": list, - "keep_all": bool, + "type": "object", + "properties": { + "column_name": { + "type": "string" + } }, - "optional_parameters": { - "keep_columns": list - } + "required": [ + "column_name" + ], + "additionalProperties": False } - def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) - def do_op(self, dispatcher, df, name, sidecar=None): return df @@ -27,21 +26,8 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.params = { - "operation": "test_op", - "required_parameters": { - "is_string": str, - "is_multiple": [str, int, float, list], - "is_bool": bool, - "is_list": list - }, - "optional_parameters": {} - } base_parameters = { - "is_string": "Condition-variable", - "is_multiple": ["a", "b", "c"], - "is_bool": False, - "is_list": [3, 4, 5] + "column_name": "a_descriptive_name" } cls.json_parameters = json.dumps(base_parameters) @@ -51,19 +37,30 @@ def tearDownClass(cls): def test_constructor(self): parameters = json.loads(self.json_parameters) - op1 = BaseOp(self.params, parameters) - self.assertIsInstance(op1, BaseOp, "constructor should create a BaseOp") - self.assertIn("is_string", op1.required_params, - "constructor required_params should contain an expected value") + test_instantiate = TestOp(parameters) + self.assertDictEqual(test_instantiate.parameters, parameters) + - def test_constructor_no_operation(self): - with self.assertRaises(ValueError) as context: - BaseOp({}, {}) - self.assertEqual(context.exception.args[0], 'OpMustHaveOperation') + def test_constructor_no_name(self): + class TestOpNoName(BaseOp): + PARAMS = { + "type": "object", + "properties": { + "column_name": { + "type": "string" + } + }, + "required": [ + "column_name" + ], + "additionalProperties": False + } - def test_constructor_no_parameters(self): - op1 = BaseOp({"operation": "no_parameter_operation"}, {}) - self.assertIsInstance(op1, BaseOp, "constructor allows a operation with no parameters") + def do_op(self, dispatcher, df, name, sidecar=None): + return df + + with self.assertRaises(TypeError): + instantiate = TestOpNoName({}) if __name__ == '__main__': unittest.main() From 1e088d2f6f234d65892c9d6010f6d1e0c7257419 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 21 Nov 2023 11:48:36 +0100 Subject: [PATCH 07/20] remove original schema --- .../resources/remodeler_schema.json | 984 ------------------ 1 file changed, 984 deletions(-) delete mode 100644 hed/tools/remodeling/resources/remodeler_schema.json diff --git a/hed/tools/remodeling/resources/remodeler_schema.json b/hed/tools/remodeling/resources/remodeler_schema.json deleted file mode 100644 index e4a40ed68..000000000 --- a/hed/tools/remodeling/resources/remodeler_schema.json +++ /dev/null @@ -1,984 +0,0 @@ -{ - "type": "array", - "items": { - "type": "object", - "required": [ - "operation", - "description", - "parameters" - ], - "additionalProperties": false, - "properties": { - "operation": { - "type": "string", - "enum": [ - "convert_columns", - "factor_column", - "factor_hed_tags", - "factor_hed_type", - "merge_consecutive", - "number_groups", - "number_rows", - "remap_columns", - "remove_columns", - "remove_rows", - "rename_columns", - "reorder_columns", - "split_rows", - "summarize_column_names", - "summarize_column_values", - "summarize_definitions", - "summarize_hed_tags", - "summarize_hed_type", - "summarize_hed_validation", - "summarize_sidecar_from_events" - ], - "default": "convert_columns" - }, - "description": { - "type": "string" - }, - "parameters": { - "type": "object", - "properties": {} - } - }, - "allOf": [ - { - "if": { - "properties": { - "operation": { - "const": "convert_columns" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_names": { - "type": "array" - }, - "convert_to": { - "type": "string" - }, - "decimal_places": { - "type": "integer" - } - }, - "required": [ - "column_names", - "convert_to" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "factor_column" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_name": { - "type": "string" - }, - "factor_values": { - "type": "array", - "items": { - "type": "string" - } - }, - "factor_names": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "column_name", - "factor_values", - "factor_names" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "factor_hed_tags" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "queries": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1 - }, - "query_names": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 1 - }, - "remove_types": { - "type": "array", - "items": { - "type": "string" - }, - "minItems": 0 - }, - "expand_context": { - "type": "boolean" - } - }, - "required": [ - "queries", - "query_names", - "remove_types" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "factor_hed_type" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "type_tag": { - "type": "string" - }, - "type_values": { - "type": "array" - } - }, - "required": [ - "type_tag", - "type_values" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "merge_consecutive" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_name": { - "type": "string" - }, - "event_code": { - "type": [ - "string", - "number" - ] - }, - "match_columns": { - "type": "array", - "items": { - "type": "string" - } - }, - "set_durations": { - "type": "boolean" - }, - "ignore_missing": { - "type": "boolean" - } - }, - "required": [ - "column_name", - "event_code", - "match_columns", - "set_durations", - "ignore_missing" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "number_groups" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "number_column_name": { - "type": "string" - }, - "source_column": { - "type": "string" - }, - "start": { - "type": "object", - "properties": { - "values": { - "type": "array" - }, - "inclusion": { - "type": "string", - "enum": [ - "include", - "exclude" - ] - } - }, - "required": [ - "values", - "inclusion" - ], - "additionalProperties": false - }, - "stop": { - "type": "object", - "properties": { - "values": { - "type": "array" - }, - "inclusion": { - "type": "string", - "enum": [ - "include", - "exclude" - ] - } - }, - "required": [ - "values", - "inclusion" - ], - "additionalProperties": false - }, - "overwrite": { - "type": "boolean" - } - }, - "required": [ - "number_column_name", - "source_column", - "start", - "stop" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "number_rows" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "number_column_name": { - "type": "string" - }, - "overwrite": { - "type": "boolean" - }, - "match_value": { - "type": "object", - "properties": { - "column": { - "type": "string" - }, - "value": { - "type": [ - "string", - "number" - ] - } - }, - "required": [ - "column", - "value" - ], - "additionalProperties": false - } - }, - "required": [ - "number_column_name" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "remap_columns" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "source_columns": { - "type": "array", - "items": { - "type": "string" - } - }, - "destination_columns": { - "type": "array", - "items": { - "type": "string" - } - }, - "map_list": { - "type": "array", - "items": { - "type": "array", - "items": { - "type": [ - "string", - "number" - ] - } - } - }, - "ignore_missing": { - "type": "boolean" - }, - "integer_sources": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "source_columns", - "destination_columns", - "map_list", - "ignore_missing" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "remove_columns" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_names": { - "type": "array", - "items": { - "type": "string" - } - }, - "ignore_missing": { - "type": "boolean" - } - }, - "required": [ - "column_names", - "ignore_missing" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "remove_rows" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_name": { - "type": "string" - }, - "remove_values": { - "type": "array", - "items": { - "type": [ - "string", - "number" - ] - } - } - }, - "required": [ - "column_name", - "remove_values" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "rename_columns" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_mapping": { - "type": "object", - "patternProperties": { - "(.*?)": { - "type": "string" - } - }, - "minProperties": 1 - }, - "ignore_missing": { - "type": "boolean" - } - }, - "required": [ - "column_mapping", - "ignore_missing" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "reorder_columns" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "column_order": { - "type": "array", - "items": { - "type": "string" - } - }, - "ignore_missing": { - "type": "boolean" - }, - "keep_others": { - "type": "boolean" - } - }, - "required": [ - "column_order", - "ignore_missing", - "keep_others" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "split_rows" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "anchor_column": { - "type": "string" - }, - "new_events": { - "type": "object", - "patternProperties": { - "^[a-zA-Z0-9_]*$": { - "type": "object", - "properties": { - "onset_source": { - "type": "array", - "items": { - "type": [ - "string", - "number" - ] - } - }, - "duration": { - "type": "array", - "items": { - "type": [ - "string", - "number" - ] - } - }, - "copy_columns": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "onset_source", - "duration", - "copy_columns" - ], - "additionalProperties": false - } - }, - "minProperties": 1 - }, - "remove_parent_event": { - "type": "boolean" - } - }, - "required": [ - "remove_parent_event" - ], - "minProperties": 2, - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_column_names" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - } - }, - "required": [ - "summary_name", - "summary_filename" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_column_values" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - }, - "skip_columns": { - "type": "array", - "items": { - "type": "string" - } - }, - "value_columns": { - "type": "array", - "items": { - "type": "string" - } - }, - "append_timecode": { - "type": "boolean" - }, - "max_categorical": { - "type": "integer" - }, - "values_per_line": { - "type": "integer" - } - }, - "required": [ - "summary_name", - "summary_filename", - "skip_columns", - "value_columns" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_definitions" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - } - }, - "required": [ - "summary_name", - "summary_filename" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_hed_tags" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - }, - "tags": { - "type": "object", - "properties": { - "Sensory events": { - "type": "array", - "items": { - "type": "string" - } - }, - "Agent actions": { - "type": "array", - "items": { - "type": "string" - } - }, - "Objects": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "Sensory events", - "Agent actions", - "Objects" - ], - "additionalProperties": false - }, - "append_timecode": { - "type": "boolean" - }, - "include_context": { - "type": "boolean" - }, - "replace_defs": { - "type": "boolean" - }, - "remove_types": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "required": [ - "summary_name", - "summary_filename", - "tags" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_hed_type" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - }, - "type_tag": { - "type": "string" - }, - "append_timecode": { - "type": "boolean" - } - }, - "required": [ - "summary_name", - "summary_filename", - "type_tag" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_hed_validation" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - }, - "append_timecode": { - "type": "boolean" - }, - "check_for_warnings": { - "type": "boolean" - } - }, - "required": [ - "summary_name", - "summary_filename", - "check_for_warnings" - ], - "additionalProperties": false - } - } - } - }, - { - "if": { - "properties": { - "operation": { - "const": "summarize_sidecar_from_events" - } - }, - "required": [ - "operation" - ] - }, - "then": { - "properties": { - "parameters": { - "type": "object", - "properties": { - "summary_name": { - "type": "string" - }, - "summary_filename": { - "type": "string" - }, - "append_timecode": { - "type": "boolean" - }, - "check_for_warnings": { - "type": "boolean" - } - }, - "required": [ - "summary_name", - "summary_filename", - "check_for_warnings" - ], - "additionalProperties": false - } - } - } - } - ] - }, - "minItems": 1 -} \ No newline at end of file From b4767c4d0318f3d37ff3da6942c9e2b57f6c34ae Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 27 Nov 2023 17:53:30 +0100 Subject: [PATCH 08/20] reorder base_op methods --- hed/tools/remodeling/operations/base_op.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index f8df37715..d6a33b1db 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -3,9 +3,12 @@ from abc import ABC, abstractmethod class BaseOp(ABC): - """ Base class for operations. All remodeling operations should extend this class. + """ Base class for operations. All remodeling operations should extend this class.""" + + def __init__(self, parameters): + """""" + self.parameters = parameters - """ @property @abstractmethod def NAME(self): @@ -16,9 +19,6 @@ def NAME(self): def PARAMS(self): pass - def __init__(self, parameters): - self.parameters = parameters - @abstractmethod def do_op(self, dispatcher, df, name, sidecar=None): """ Base class method to be overridden by each operation. From 3242162d8e0d90f2d2e436a40eaef94986242971 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 5 Dec 2023 09:56:22 +0100 Subject: [PATCH 09/20] add to operation parameter specification --- .../remodeling/operations/convert_columns_op.py | 12 ++++++------ hed/tools/remodeling/operations/remap_columns_op.py | 6 ++++-- hed/tools/remodeling/operations/remove_columns_op.py | 3 ++- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 1fbd39f20..c29501892 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -19,10 +19,14 @@ class ConvertColumnsOp(BaseOp): "type": "object", "properties": { "column_names": { - "type": "array" + "type": "array", + "items": { + "type": "string" + } }, "convert_to": { - "type": "string" + "type": "string", + "enum": ['str', 'int', 'float', 'fixed'], }, "decimal_places": { "type": "integer" @@ -56,10 +60,6 @@ def __init__(self, parameters): self.column_names = parameters['column_names'] self.convert_to = parameters['convert_to'] self.decimal_places = parameters.get('decimal_places', None) - self.allowed_types = ['str', 'int', 'float', 'fixed'] - if self.convert_to not in self.allowed_types: - raise ValueError("CannotConvertToSpecifiedType", - f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}") def do_op(self, dispatcher, df, name, sidecar=None): """ Convert the specified column to a specified type. diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 34c84edd5..836ac006e 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -50,8 +50,10 @@ class RemapColumnsOp(BaseOp): "string", "number" ] - } - } + }, + "minItems" : 1 + }, + "minItems": 1 }, "ignore_missing": { "type": "boolean" diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 952ce41ab..c1373075e 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -19,7 +19,8 @@ class RemoveColumnsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1 }, "ignore_missing": { "type": "boolean" From 9fd6d53f9bbc934884eee8c734a1ad67dd3121a2 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 5 Dec 2023 09:56:47 +0100 Subject: [PATCH 10/20] finish validator tests --- hed/tools/remodeling/validator.py | 4 ++-- tests/tools/remodeling/test_validator.py | 21 +++++++++++++-------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py index c62c7eccd..baab638e1 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/validator.py @@ -28,7 +28,8 @@ class RemodelerValidator(): "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'." + "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'.", + "enum": "Operation {operation_index}: Operation parameter {parameter_path}, in the {operation_name} operation, contains and unexpected value. Value should be one of {validator_value}." } } @@ -105,7 +106,6 @@ def _parse_message(self, error, operations): that led to the error ''' error_dict = vars(error) - print(error_dict) level = len(error_dict["path"]) if level > 2: diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index c00293261..9a0165fbe 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -10,8 +10,6 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.remodeler_schema_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__.replace('tests', 'hed'))), '../remodeling/resources/remodeler_schema.json')) - with open(os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '../data/remodel_tests/all_remodel_operations.json'))) as f: cls.remodel_file = json.load(f) @@ -88,9 +86,16 @@ def test_validate_parameters(self): self.assertEqual(error_strings[0], "Operation 1: The value of onset_source, response, new_events, in the split_rows operation, should be a array. {'key': 'value'} is not a array.") empty_array = [deepcopy(self.remodel_file[0])] - - empty_array_nested = [deepcopy(self.remodel_file[0])] - - - - + empty_array[0]["parameters"]["column_names"] = [] + error_strings = validator.validate(empty_array) + self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should have at least 1 item(s).") + + empty_array_nested = [deepcopy(self.remodel_file[5])] + empty_array_nested[0]["parameters"]["map_list"][0] = [] + error_strings = validator.validate(empty_array_nested) + self.assertEqual(error_strings[0], "Operation 1: The list in item 1, map_list, in the remap_columns operation, should have at least 1 item(s).") + + # invalid_value = [deepcopy(self.remodel_file[18])] + # invalid_value[0]["parameters"]["convert_to"] = "invalid_value" + # error_strings = validator.validate(invalid_value) + # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the convert_columns operation, contains and unexpected value. Value should be one of ['str', 'int', 'float', 'fixed'].") From ffd74fcb31b57160d20ae2c69ff1d6305e9b53c2 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 5 Dec 2023 10:21:44 +0100 Subject: [PATCH 11/20] update docstrings --- hed/tools/remodeling/operations/base_op.py | 6 +++++- .../remodeling/operations/convert_columns_op.py | 13 +------------ hed/tools/remodeling/operations/factor_column_op.py | 4 ++-- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index d6a33b1db..42cd887bb 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -6,7 +6,11 @@ class BaseOp(ABC): """ Base class for operations. All remodeling operations should extend this class.""" def __init__(self, parameters): - """""" + """ Constructor for the BaseOp class. Should be extended by operations. + + Parameters: + parameters (dict): A dictionary specifying the appropriate parameters for the operation. + """ self.parameters = parameters @property diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index c29501892..8c9ac485e 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -4,14 +4,13 @@ class ConvertColumnsOp(BaseOp): - """ Convert. + """ Convert data type in column Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. - **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) - **decimal_places** (*int*): Number decimal places to keep (for fixed only). - """ NAME = "convert_columns" @@ -45,16 +44,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If convert_to is not one of the allowed values. - """ super().__init__(self.PARAMS, parameters) self.column_names = parameters['column_names'] diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index af08db511..cc6ee476b 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -14,7 +14,6 @@ class FactorColumnOp(BaseOp): - **factor_values** (*list*): Values in the column column_name to create factors for. - **factor_names** (*list*): Names to use as the factor columns. - """ NAME = "factor_column" @@ -34,7 +33,8 @@ class FactorColumnOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1 } }, "required": [ From e1055ea44d37c332f7244b5e72cb0c7cdfd08774 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 5 Dec 2023 11:52:17 +0100 Subject: [PATCH 12/20] update doc + specification --- .../operations/convert_columns_op.py | 2 ++ .../remodeling/operations/factor_column_op.py | 7 ------ .../operations/factor_hed_tags_op.py | 18 +++++---------- .../operations/factor_hed_type_op.py | 10 --------- .../operations/merge_consecutive_op.py | 8 ------- .../remodeling/operations/remap_columns_op.py | 22 ++++--------------- .../operations/remove_columns_op.py | 7 ------ .../remodeling/operations/remove_rows_op.py | 7 ------ .../operations/rename_columns_op.py | 7 ------ .../operations/reorder_columns_op.py | 7 ------ .../remodeling/operations/split_rows_op.py | 12 +++------- .../operations/summarize_column_names_op.py | 7 ------ .../operations/summarize_column_values_op.py | 7 ------ .../operations/summarize_definitions_op.py | 6 ----- .../operations/summarize_hed_tags_op.py | 14 +++++------- .../operations/summarize_hed_type_op.py | 12 ++++------ .../operations/summarize_hed_validation_op.py | 10 +++------ .../summarize_sidecar_from_events_op.py | 7 ------ 18 files changed, 27 insertions(+), 143 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 8c9ac485e..c1528c9e1 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -9,6 +9,8 @@ class ConvertColumnsOp(BaseOp): Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. - **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) + + Optional remodeling parameters: - **decimal_places** (*int*): Number decimal places to keep (for fixed only). """ diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index cc6ee476b..31d55d044 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -51,13 +51,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - :raises ValueError: - If factor_names is not empty and is not the same length as factor_values. diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index e670172d2..d83488189 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -17,8 +17,10 @@ class FactorHedTagsOp(BaseOp): Required remodeling parameters: - **queries** (*list*): Queries to be applied successively as filters. - **query_names** (*list*): Column names for the query factors. - - **remove_types** (*list*): Structural HED tags to be removed. - - **expand_context** (*bool*): Expand the context if True. + - **remove_types** (*list*): Structural HED tags to be removed. + + Optional remodeling parameters: + - **expand_context** (*bool*): Expand the context if True. Notes: - If factor column names are not provided, *query1*, *query2*, ... are used. @@ -35,7 +37,8 @@ class FactorHedTagsOp(BaseOp): "items": { "type": "string" }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True }, "query_names": { "type": "array", @@ -69,17 +72,8 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - :raises ValueError: - - If the specification is missing a valid operation. - If the length of query names is not empty and not same length as queries. - - If there are duplicate query names. """ super().__init__(self.PARAMS, parameters) diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index a2df3e446..ea81f9762 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -43,16 +43,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - - :raises ValueError: - - If the specification is missing a valid operation. - """ super().__init__(self.PARAMS, parameters) self.type_tag = parameters["type_tag"] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 082bda981..720826559 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -58,15 +58,7 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - :raises ValueError: - - If the specification is missing a valid operation. - If one of the match column is the merge column. """ diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 836ac006e..b12ca3284 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -33,13 +33,15 @@ class RemapColumnsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1 }, "destination_columns": { "type": "array", "items": { "type": "string" - } + }, + "minItems": 1 }, "map_list": { "type": "array", @@ -80,18 +82,9 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - :raises ValueError: - If an integer column is not a key column. - If a column designated as an integer source does not have valid integers. - - If no source columns are specified. - - If no destination columns are specified. - If a map_list entry has the wrong number of items (source columns + destination columns). """ @@ -109,13 +102,6 @@ def __init__(self, parameters): self.destination_columns = parameters['destination_columns'] self.map_list = parameters['map_list'] self.ignore_missing = parameters['ignore_missing'] - if len(self.source_columns) < 1: - raise ValueError("EmptySourceColumns", - f"The source column list {str(self.source_columns)} must be non-empty") - - if len(self.destination_columns) < 1: - raise ValueError("EmptyDestinationColumns", - f"The destination column list {str(self.destination_columns)} must be non-empty") entry_len = len(self.source_columns) + len(self.destination_columns) for index, item in enumerate(self.map_list): if len(item) != entry_len: diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index da44fcf3c..2832d06a4 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -39,13 +39,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.column_names = parameters['column_names'] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 9a8018e67..c849e0279 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -42,13 +42,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.column_name = parameters["column_name"] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 5f927213c..4a22358cd 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -42,13 +42,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__("rename_columns", parameters) self.column_mapping = parameters['column_mapping'] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 09f2116e2..18de0cba1 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -43,13 +43,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.column_order = parameters['column_order'] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 0190cc543..fd0773ba8 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -68,9 +68,10 @@ class SplitRowsOp(BaseOp): } }, "required": [ - "remove_parent_event" + "remove_parent_event", + "new_events", + "anchor_columns" ], - "minProperties": 2, "additionalProperties": False } @@ -80,13 +81,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.anchor_column = parameters['anchor_column'] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index b392d1a72..602139269 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -45,13 +45,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 9f691fdab..4993c50a4 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -72,13 +72,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 7f1e54572..43a73180f 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -46,12 +46,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index c794599b8..33e8cddb4 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -18,7 +18,10 @@ class SummarizeHedTagsOp(BaseOp): - **tags** (*dict*): Specifies how to organize the tag output. Optional remodeling parameters: - - **expand_context** (*bool*): If True, include counts from expanded context (not supported). + - **append_timecode** (*bool*): If True, . + - **include_context** (*bool*): If True, . + - **replace_defs** (*bool*): If True, . + - **remove_types** (*bool*): If True, . The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. The @@ -96,14 +99,7 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - + """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 1811beb7a..6407f604d 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -15,7 +15,10 @@ class SummarizeHedTypeOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - **summary_filename** (*str*): Base filename of the summary. - - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). + - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). + + Optional remodeling parameters: + - **append_timecode** (*bool*): If true, The purpose of this op is to produce a summary of the occurrences of specified tag. This summary is often used with `condition-variable` to produce a summary of the experimental design. @@ -56,13 +59,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 49557f462..5c57c8b3b 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -16,6 +16,9 @@ class SummarizeHedValidationOp(BaseOp): - **summary_filename** (*str*): Base filename of the summary. - **check_for_warnings** (*bool*): If true include warnings as well as errors. + Optional remodeling parameters: + - **append_timecode** (*bool*): If true, + The purpose of this op is to produce a summary of the HED validation errors in a file. """ @@ -52,13 +55,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) self.summary_name = parameters['summary_name'] diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index c5549d6b5..6ad414822 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -62,13 +62,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Dictionary with the parameter values for required and optional parameters. - :raises KeyError: - - If a required parameter is missing. - - If an unexpected parameter is provided. - - :raises TypeError: - - If a parameter has the wrong type. - """ super().__init__(self.PARAMS, parameters) From ff79911f04603aa1bb2ba2d9a0f175b829ed2b2d Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 12 Dec 2023 13:43:10 +0100 Subject: [PATCH 13/20] update docs and json schema specification of operations --- .../operations/convert_columns_op.py | 10 +++- .../remodeling/operations/factor_column_op.py | 29 +++++++----- .../operations/factor_hed_tags_op.py | 25 +++++----- .../operations/factor_hed_type_op.py | 16 +++++-- .../operations/merge_consecutive_op.py | 7 +-- .../remodeling/operations/remap_columns_op.py | 10 +++- .../remodeling/operations/remove_rows_op.py | 3 +- .../operations/rename_columns_op.py | 2 +- .../operations/reorder_columns_op.py | 4 +- .../remodeling/operations/split_rows_op.py | 21 +++++---- .../operations/summarize_column_names_op.py | 7 ++- .../operations/summarize_column_values_op.py | 36 ++++++++------- .../operations/summarize_definitions_op.py | 5 +- .../operations/summarize_hed_tags_op.py | 46 +++++++------------ .../operations/summarize_hed_type_op.py | 2 +- .../operations/summarize_hed_validation_op.py | 8 +++- .../summarize_sidecar_from_events_op.py | 17 ++++--- 17 files changed, 143 insertions(+), 105 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index c1528c9e1..ed87ec899 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -1,4 +1,6 @@ """ Convert the type of the specified columns of a tabular file. """ +#TODO finish implementation +#TODO Specify when requirements for decimal_places parameter from hed.tools.remodeling.operations.base_op import BaseOp @@ -8,11 +10,13 @@ class ConvertColumnsOp(BaseOp): Required remodeling parameters: - **column_names** (*list*): The list of columns to convert. - - **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) + - **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.) Optional remodeling parameters: - **decimal_places** (*int*): Number decimal places to keep (for fixed only). + Notes: + - Decimal places requirements not implemented """ NAME = "convert_columns" @@ -23,7 +27,9 @@ class ConvertColumnsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, "convert_to": { "type": "string", diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 31d55d044..ca67ab286 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -4,43 +4,50 @@ # TODO: Does not handle empty factor names. # TODO: Does not handle optional return columns. +# TODO: Put dependency factornames requires factorvalues in +# TODO: Same length factornames and factorvalues class FactorColumnOp(BaseOp): """ Create tabular file factor columns from column values. Required remodeling parameters: - - **column_name** (*str*): The name of a column in the DataFrame. - - **factor_values** (*list*): Values in the column column_name to create factors for. - - **factor_names** (*list*): Names to use as the factor columns. + - **column_name** (*str*): The name of a column in the DataFrame. + + Optional remodeling parameters + - **factor_names** (*list*): Names to use as the factor columns. + - **factor_values** (*list*): Values in the column column_name to create factors for. """ NAME = "factor_column" - + PARAMS = { "type": "object", "properties": { "column_name": { "type": "string" }, - "factor_values": { + "factor_names": { "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, - "factor_names": { + "factor_values": { "type": "array", "items": { - "type": "string" + "type": "string" }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True } }, "required": [ "column_name", - "factor_values", - "factor_names" + "factor_names", + "factor_values" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index d83488189..544fb682c 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -15,17 +15,16 @@ class FactorHedTagsOp(BaseOp): """ Create tabular file factors from tag queries. Required remodeling parameters: - - **queries** (*list*): Queries to be applied successively as filters. - - **query_names** (*list*): Column names for the query factors. - - **remove_types** (*list*): Structural HED tags to be removed. + - **queries** (*list*): Queries to be applied successively as filters. Optional remodeling parameters: - **expand_context** (*bool*): Expand the context if True. + - **query_names** (*list*): Column names for the query factors. + - **remove_types** (*list*): Structural HED tags to be removed. Notes: - - If factor column names are not provided, *query1*, *query2*, ... are used. + - If query names are not provided, *query1*, *query2*, ... are used. - When the context is expanded, the effect of events for temporal extent is accounted for. - - Context expansion is not implemented in the current version. """ NAME = "factor_hed_tags" @@ -40,28 +39,28 @@ class FactorHedTagsOp(BaseOp): "minItems": 1, "uniqueItems": True }, + "expand_context": { + "type": "boolean" + }, "query_names": { "type": "array", "items": { "type": "string" }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True }, "remove_types": { "type": "array", "items": { "type": "string" }, - "minItems": 0 - }, - "expand_context": { - "type": "boolean" + "minItems": 1, + "uniqueItems": True } }, "required": [ - "queries", - "query_names", - "remove_types" + "queries" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index ea81f9762..097285ee0 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -14,8 +14,10 @@ class FactorHedTypeOp(BaseOp): """ Create tabular file factors from type variables and append to tabular data. Required remodeling parameters: - - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). - - **type_values** (*list*): Factor values to include. If empty all values of that type_tag are used. + - **type_tag** (*str*): HED tag used to find the factors (most commonly `condition-variable`). + + Optional remodeling parameters: + - **type_values** (*list*): If provided, specifies which factor values to include. """ NAME = "factor_hed_type" @@ -27,12 +29,16 @@ class FactorHedTypeOp(BaseOp): "type": "string" }, "type_values": { - "type": "array" + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "uniqueItems": True } }, "required": [ - "type_tag", - "type_values" + "type_tag" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 720826559..7a8787b9b 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -10,9 +10,11 @@ class MergeConsecutiveOp(BaseOp): Required remodeling parameters: - **column_name** (*str*): name of column whose consecutive values are to be compared (the merge column). - **event_code** (*str* or *int* or *float*): the particular value in the match column to be merged. - - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. - **set_durations** (*bool*): If true, set the duration of the merged event to the extent of the merged events. - - **ignore_missing** (*bool*): If true, missing match_columns are ignored. + - **ignore_missing** (*bool*): If true, missing match_columns are ignored. + + Optional remodeling parameters: + - **match_columns** (*list*): A list of columns whose values have to be matched for two events to be the same. """ NAME = "merge_consecutive" @@ -45,7 +47,6 @@ class MergeConsecutiveOp(BaseOp): "required": [ "column_name", "event_code", - "match_columns", "set_durations", "ignore_missing" ], diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index b12ca3284..4e6034e10 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -83,8 +83,7 @@ def __init__(self, parameters): parameters (dict): Parameter values for required and optional parameters. :raises ValueError: - - If an integer column is not a key column. - - If a column designated as an integer source does not have valid integers. + - If an integer column is not a source column. - If a map_list entry has the wrong number of items (source columns + destination columns). """ @@ -111,6 +110,13 @@ def __init__(self, parameters): self.key_map = self._make_key_map() def _make_key_map(self): + """ + + :raises ValueError: + - If a column designated as an integer source does not have valid integers. + + """ + key_df = pd.DataFrame( self.map_list, columns=self.source_columns+self.destination_columns) key_map = KeyMap(self.source_columns, diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index c849e0279..5f81bd762 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -26,7 +26,8 @@ class RemoveRowsOp(BaseOp): "string", "number" ] - } + }, + "minItems": 1 } }, "required": [ diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 4a22358cd..9aa33eea6 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -19,7 +19,7 @@ class RenameColumnsOp (BaseOp): "column_mapping": { "type": "object", "patternProperties": { - "(.*?)": { + ".*": { "type": "string" } }, diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 18de0cba1..996349830 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -20,7 +20,9 @@ class ReorderColumnsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, "ignore_missing": { "type": "boolean" diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index fd0773ba8..e496fe28c 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -25,7 +25,7 @@ class SplitRowsOp(BaseOp): "new_events": { "type": "object", "patternProperties": { - "^[a-zA-Z0-9_]*$": { + ".*": { "type": "object", "properties": { "onset_source": { @@ -35,7 +35,8 @@ class SplitRowsOp(BaseOp): "string", "number" ] - } + }, + "minItems": 1 }, "duration": { "type": "array", @@ -44,33 +45,35 @@ class SplitRowsOp(BaseOp): "string", "number" ] - } + }, + "minItems": 1 }, "copy_columns": { "type": "array", "items": { "type": "string" } - } + }, + "minItems": 1, + "uniqueItems": True }, "required": [ "onset_source", - "duration", - "copy_columns" + "duration" ], "additionalProperties": False } }, "minProperties": 1 }, - "remove_parent_event": { + "remove_parent_row": { "type": "boolean" } }, "required": [ - "remove_parent_event", "new_events", - "anchor_columns" + "anchor_columns", + "remove_parent_row" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 602139269..394f2129a 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -9,8 +9,11 @@ class SummarizeColumnNamesOp(BaseOp): """ Summarize the column names in a collection of tabular files. Required remodeling parameters: - - **summary_name** (*str*) The name of the summary. - - **summary_filename** (*str*) Base filename of the summary. + - **summary_name** (*str*): The name of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. The purpose is to check that all the tabular files have the same columns in same order. diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 4993c50a4..2380e47ea 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -10,12 +10,14 @@ class SummarizeColumnValuesOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. - - **skip_columns** (*list*): Names of columns to skip in the summary. - - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. + - **summary_filename** (*str*): Base filename of the summary. - Optional remodeling parameters: - - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. + - **max_categorical** (*int*): Maximum number of unique values to include in summary for a categorical column. + - **skip_columns** (*list*): Names of columns to skip in the summary. + - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. + - **values_per_line** (*int*): The number of values output per line in the summary. The purpose is to produce a summary of the values in a tabular file. @@ -31,23 +33,27 @@ class SummarizeColumnValuesOp(BaseOp): "summary_filename": { "type": "string" }, + "append_timecode": { + "type": "boolean" + }, + "max_categorical": { + "type": "integer" + }, "skip_columns": { "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, "value_columns": { "type": "array", "items": { "type": "string" - } - }, - "append_timecode": { - "type": "boolean" - }, - "max_categorical": { - "type": "integer" + }, + "minItems": 1, + "uniqueItems": True }, "values_per_line": { "type": "integer" @@ -55,9 +61,7 @@ class SummarizeColumnValuesOp(BaseOp): }, "required": [ "summary_name", - "summary_filename", - "skip_columns", - "value_columns" + "summary_filename" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 43a73180f..9f66a05a9 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -11,7 +11,10 @@ class SummarizeDefinitionsOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): If false (default), the timecode is not appended to the base filename when summary is saved, otherwise it is. The purpose is to produce a summary of the values in a tabular file. diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 33e8cddb4..d6e0c8040 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -18,10 +18,10 @@ class SummarizeHedTagsOp(BaseOp): - **tags** (*dict*): Specifies how to organize the tag output. Optional remodeling parameters: - - **append_timecode** (*bool*): If True, . - - **include_context** (*bool*): If True, . - - **replace_defs** (*bool*): If True, . - - **remove_types** (*bool*): If True, . + - **append_timecode** (*bool*): If True, the timecode is appended to the base filename when summary is saved. + - **include_context** (*bool*): If True, context of events is included in summary. + - **remove_types** (*list*): A list of type tags, such as Condition-variable or Task, to be excluded from the summary. + - **replace_defs** (*bool*): If True, the def tag is replaced by the contents of the definitions. The purpose of this op is to produce a summary of the occurrences of hed tags organized in a specified manner. The @@ -41,32 +41,18 @@ class SummarizeHedTagsOp(BaseOp): }, "tags": { "type": "object", - "properties": { - "Sensory events": { + "patternProperties": { + ".*": { "type": "array", "items": { "type": "string" - } - }, - "Agent actions": { - "type": "array", - "items": { - "type": "string" - } - }, - "Objects": { - "type": "array", - "items": { - "type": "string" - } - } + }, + "minItems": 1, + "uniqueItems": True }, - "required": [ - "Sensory events", - "Agent actions", - "Objects" - ], + "minProperties": 1, "additionalProperties": False + } }, "append_timecode": { "type": "boolean" @@ -74,14 +60,16 @@ class SummarizeHedTagsOp(BaseOp): "include_context": { "type": "boolean" }, - "replace_defs": { - "type": "boolean" - }, "remove_types": { "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True + }, + "replace_defs": { + "type": "boolean" } }, "required": [ diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 6407f604d..4b0341fcf 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -18,7 +18,7 @@ class SummarizeHedTypeOp(BaseOp): - **type_tag** (*str*):Type tag to get_summary (e.g. `condition-variable` or `task` tags). Optional remodeling parameters: - - **append_timecode** (*bool*): If true, + - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved The purpose of this op is to produce a summary of the occurrences of specified tag. This summary is often used with `condition-variable` to produce a summary of the experimental design. diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index 5c57c8b3b..e3fb43ea8 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -17,7 +17,7 @@ class SummarizeHedValidationOp(BaseOp): - **check_for_warnings** (*bool*): If true include warnings as well as errors. Optional remodeling parameters: - - **append_timecode** (*bool*): If true, + - **append_timecode** (*bool*): If true, the timecode is appended to the base filename when summary is saved. The purpose of this op is to produce a summary of the HED validation errors in a file. @@ -38,11 +38,15 @@ class SummarizeHedValidationOp(BaseOp): }, "check_for_warnings": { "type": "boolean" + }, + "append_timecode": { + "type": "boolean" } }, "required": [ "summary_name", - "summary_filename" + "summary_filename", + "check_for_warnings" ], "additionalProperties": False } diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 6ad414822..3425da569 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -11,7 +11,10 @@ class SummarizeSidecarFromEventsOp(BaseOp): Required remodeling parameters: - **summary_name** (*str*): The name of the summary. - - **summary_filename** (*str*): Base filename of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + Optional remodeling parameters: + - **append_timecode** (*bool*): - **skip_columns** (*list*): Names of columns to skip in the summary. - **value_columns** (*list*): Names of columns to treat as value columns rather than categorical columns. @@ -33,13 +36,17 @@ class SummarizeSidecarFromEventsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, "value_columns": { "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True }, "append_timecode": { "type": "boolean" @@ -47,9 +54,7 @@ class SummarizeSidecarFromEventsOp(BaseOp): }, "required": [ "summary_name", - "summary_filename", - "skip_columns", - "value_columns" + "summary_filename" ], "additionalProperties": False } From 63a168b6cec1f18f43a9d37f049916a1939de17d Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Tue, 12 Dec 2023 13:44:07 +0100 Subject: [PATCH 14/20] make compiled schema accessible in validator --- hed/tools/remodeling/validator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py index baab638e1..7fd77a424 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/validator.py @@ -84,7 +84,8 @@ class RemodelerValidator(): } def __init__(self): - self.validator = Draft7Validator(self._construct_schema()) + self.schema = self._construct_schema() + self.validator = Draft7Validator(self.schema) def validate(self, operations): list_of_error_strings = [] From 07814d79c865a064e7e306044e43cdd4b9edbb44 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 18 Dec 2023 13:54:32 +0100 Subject: [PATCH 15/20] add dependency error message --- hed/tools/remodeling/validator.py | 43 ++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py index 7fd77a424..3b8427e6d 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/validator.py @@ -1,12 +1,13 @@ import os import json from copy import deepcopy -from jsonschema import Draft7Validator +from jsonschema import Draft202012Validator from jsonschema.exceptions import ErrorTree from hed.tools.remodeling.operations.valid_operations import valid_operations class RemodelerValidator(): + """Validator for remodeler input files.""" MESSAGE_STRINGS = { "0": { @@ -22,15 +23,17 @@ class RemodelerValidator(): "type": "Operation {operation_index}: {instance} is not a {validator_value}. {operation_field} should be of type {validator_value}.", "enum": "{instance} is not a known remodeler operation. Accepted remodeler operations can be found in the documentation.", "required": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name}.", - "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'." + "additionalProperties": "Operation {operation_index}: Operation parameters for {operation_name} contain an unexpected field '{added_property}'.", + "dependentRequired": "Operation {operation_index}: The parameter {missing_value} is missing. {missing_value} is a required parameter of {operation_name} when {dependent_on} is specified." }, "more": { "type": "Operation {operation_index}: The value of {parameter_path}, in the {operation_name} operation, should be a {validator_value}. {instance} is not a {validator_value}.", "minItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} item(s).", "required": "Operation {operation_index}: The field {missing_value} is missing in {parameter_path}. {missing_value} is a required parameter of {parameter_path}.", "additionalProperties": "Operation {operation_index}: Operation parameters for {parameter_path} contain an unexpected field '{added_property}'.", - "enum": "Operation {operation_index}: Operation parameter {parameter_path}, in the {operation_name} operation, contains and unexpected value. Value should be one of {validator_value}." - + "enum": "Operation {operation_index}: Operation parameter {parameter_path}, in the {operation_name} operation, contains and unexpected value. Value should be one of {validator_value}.", + "uniqueItems": "Operation {operation_index}: The list in {parameter_path}, in the {operation_name} operation, should only contain unique items.", + "minProperties": "Operation {operation_index}: The dictionary in {parameter_path}, in the {operation_name} operation, should have at least {validator_value} key(s)." } } @@ -84,10 +87,25 @@ class RemodelerValidator(): } def __init__(self): + """ Constructor for remodeler Validator + + Parameters: + - **schema** (*dict*): The compiled json schema against which remodeler files should be validated + - **validator** (*Draft202012Validator*): The instantiated json schema validator + """ self.schema = self._construct_schema() - self.validator = Draft7Validator(self.schema) + self.validator = Draft202012Validator(self.schema) def validate(self, operations): + """ Validates a dictionary against the json schema specification for the remodeler file and returns the a list of user friendly error messages + + Parameters: + **operations** (*dict*): Dictionary with input operations to run through the remodeler + + Returns: + **list_of_error_strings** (*list*): List with all error messages for every error identified by the validator + """ + list_of_error_strings = [] for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): list_of_error_strings.append( @@ -97,14 +115,15 @@ def validate(self, operations): def _parse_message(self, error, operations): ''' Return a user friendly error message based on the jsonschema validation error - args: - - errors: a list of errors returned from the validator - - operations: the operations that were put in + Parameters: + - **error** (*ValidationError*): A validation error from jsonschema validator + - **operations** (*dict*): The operations that were validated Note: - json schema error does not contain all necessary information to return a proper error message so we also take some information directly from the operations that led to the error + - all necessary information is gathered into an error dict, message strings are predefined in a dictionary which are formatted with additional information ''' error_dict = vars(error) @@ -140,6 +159,12 @@ def _parse_message(self, error, operations): if type == 'additionalProperties': error_dict["added_property"] = error_dict["message"].split("'")[ 1::2][0] + + # dependent required provided both the missing value and the reason it is required in one dictionary + # it is split over two for the error message + if type == 'dependentRequired': + error_dict["missing_value"] = list(error_dict["validator_value"].keys())[0] + error_dict["dependent_on"] = list(error_dict["validator_value"].values())[0] return self.MESSAGE_STRINGS[str(level)][type].format(**error_dict) @@ -154,9 +179,7 @@ def _construct_schema(self): parameter_specification = deepcopy(self.PARAMETER_SPECIFICATION_TEMPLATE) parameter_specification["if"]["properties"]["operation"]["const"] = operation[0] parameter_specification["then"]["properties"]["parameters"] = operation[1].PARAMS - #print(parameter_specification) schema["items"]["allOf"].append(deepcopy(parameter_specification)) - #print(schema) return schema From 266e984744c03f53a6f8514458714998bfa5c859 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 18 Dec 2023 14:00:43 +0100 Subject: [PATCH 16/20] add dependency checks --- .../operations/convert_columns_op.py | 10 ++++++++- .../remodeling/operations/factor_column_op.py | 8 +++---- .../remodel_tests/all_remodel_operations.json | 10 ++------- tests/tools/remodeling/test_validator.py | 22 +++++++++++++++++-- 4 files changed, 35 insertions(+), 15 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index ed87ec899..c089420d7 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -43,7 +43,15 @@ class ConvertColumnsOp(BaseOp): "column_names", "convert_to" ], - "additionalProperties": False + "additionalProperties": False, + "if": { + "properties": { + "convert_to": {"const": "fixed"} + } + }, + "then": { + "required": ["decimal_places"] + } } def __init__(self, parameters): diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index ca67ab286..207d7ad3c 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -4,7 +4,6 @@ # TODO: Does not handle empty factor names. # TODO: Does not handle optional return columns. -# TODO: Put dependency factornames requires factorvalues in # TODO: Same length factornames and factorvalues @@ -45,10 +44,11 @@ class FactorColumnOp(BaseOp): } }, "required": [ - "column_name", - "factor_names", - "factor_values" + "column_name" ], + "dependentRequired": { + "factor_names": ["factor_values"] + }, "additionalProperties": False } diff --git a/tests/data/remodel_tests/all_remodel_operations.json b/tests/data/remodel_tests/all_remodel_operations.json index 29ec075ba..34e929f95 100644 --- a/tests/data/remodel_tests/all_remodel_operations.json +++ b/tests/data/remodel_tests/all_remodel_operations.json @@ -37,7 +37,6 @@ "correct", "incorrect" ], - "remove_types": [], "expand_context": false } }, @@ -45,8 +44,7 @@ "operation": "factor_hed_type", "description": "Factor based on the sex of the images being presented.", "parameters": { - "type_tag": "Condition-variable", - "type_values": [] + "type_tag": "Condition-variable" } }, { @@ -185,7 +183,7 @@ ] } }, - "remove_parent_event": false + "remove_parent_row": false } }, { @@ -271,10 +269,6 @@ "parameters": { "summary_name": "AOMIC_generate_sidecar", "summary_filename": "AOMIC_generate_sidecar", - "skip_columns": [ - "onset", - "duration" - ], "value_columns": [ "response_time", "stop_signal_delay" diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 9a0165fbe..90dfb5898 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -3,8 +3,6 @@ import unittest from copy import deepcopy from hed.tools.remodeling.validator import RemodelerValidator -from jsonschema import Draft7Validator -from jsonschema.exceptions import SchemaError class Test(unittest.TestCase): @@ -99,3 +97,23 @@ def test_validate_parameters(self): # invalid_value[0]["parameters"]["convert_to"] = "invalid_value" # error_strings = validator.validate(invalid_value) # self.assertEqual(error_strings[0], "Operation 1: Operation parameter convert_to, in the convert_columns operation, contains and unexpected value. Value should be one of ['str', 'int', 'float', 'fixed'].") + + # value_dependency = [deepcopy(self.remodel_file[18])] + # value_dependency[0]["parameters"]["convert_to"] = "fixed" + # error_strings = validator.validate(value_dependency) + # self.assertEqual(error_strings[0], "Operation 1: The parameter decimal_places is missing. decimal_places is a required parameter of convert_columns.") + + property_dependency = [deepcopy(self.remodel_file[1])] + del property_dependency[0]["parameters"]["factor_values"] + error_strings = validator.validate(property_dependency) + self.assertEqual(error_strings[0], "Operation 1: The parameter factor_names is missing. factor_names is a required parameter of factor_column when ['factor_values'] is specified.") + + double_item_in_array = [deepcopy(self.remodel_file[0])] + double_item_in_array[0]["parameters"]["column_names"] = ['response', 'response'] + error_strings = validator.validate(double_item_in_array) + self.assertEqual(error_strings[0], "Operation 1: The list in column_names, in the remove_columns operation, should only contain unique items.") + + double_item_in_array_nested = [deepcopy(self.remodel_file[10])] + double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ['response', 'response'] + error_strings = validator.validate(double_item_in_array_nested) + self.assertEqual(error_strings[0], "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") From ad7573381469e2c3fc369ab764b6810bb54e8252 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Mon, 18 Dec 2023 14:01:46 +0100 Subject: [PATCH 17/20] correct some parameter specifications --- hed/tools/remodeling/operations/remap_columns_op.py | 7 +++++-- hed/tools/remodeling/operations/remove_columns_op.py | 3 ++- hed/tools/remodeling/operations/remove_rows_op.py | 3 ++- hed/tools/remodeling/operations/split_rows_op.py | 12 ++++++------ 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 4e6034e10..58b9fb437 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -55,7 +55,8 @@ class RemapColumnsOp(BaseOp): }, "minItems" : 1 }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True }, "ignore_missing": { "type": "boolean" @@ -64,7 +65,9 @@ class RemapColumnsOp(BaseOp): "type": "array", "items": { "type": "string" - } + }, + "minItems": 1, + "uniqueItems": True } }, "required": [ diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 2832d06a4..a81fbd50c 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -20,7 +20,8 @@ class RemoveColumnsOp(BaseOp): "items": { "type": "string" }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True }, "ignore_missing": { "type": "boolean" diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 5f81bd762..6eb0e2daa 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -27,7 +27,8 @@ class RemoveRowsOp(BaseOp): "number" ] }, - "minItems": 1 + "minItems": 1, + "uniqueItems": True } }, "required": [ diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index e496fe28c..45d86da28 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -15,7 +15,7 @@ class SplitRowsOp(BaseOp): """ NAME = "split_rows" - + PARAMS = { "type": "object", "properties": { @@ -52,10 +52,10 @@ class SplitRowsOp(BaseOp): "type": "array", "items": { "type": "string" - } - }, - "minItems": 1, - "uniqueItems": True + }, + "minItems": 1, + "uniqueItems": True + } }, "required": [ "onset_source", @@ -71,8 +71,8 @@ class SplitRowsOp(BaseOp): } }, "required": [ + "anchor_column", "new_events", - "anchor_columns", "remove_parent_row" ], "additionalProperties": False From 3df10acaff0b382c8fd46d8aeaea34066da03946 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Fri, 5 Jan 2024 14:46:41 +0100 Subject: [PATCH 18/20] remodel schema additional validation of input data --- hed/tools/remodeling/operations/base_op.py | 10 ++++++ .../operations/convert_columns_op.py | 9 ++--- .../remodeling/operations/factor_column_op.py | 13 ++++++-- .../operations/factor_hed_tags_op.py | 11 +++++-- .../operations/factor_hed_type_op.py | 4 +++ .../operations/merge_consecutive_op.py | 14 ++++---- .../remodeling/operations/number_groups_op.py | 4 +++ .../remodeling/operations/number_rows_op.py | 4 +++ .../remodeling/operations/remap_columns_op.py | 26 +++++++-------- .../operations/remove_columns_op.py | 4 +++ .../remodeling/operations/remove_rows_op.py | 4 +++ .../operations/rename_columns_op.py | 6 +++- .../operations/reorder_columns_op.py | 4 +++ .../remodeling/operations/split_rows_op.py | 4 +++ .../operations/summarize_column_names_op.py | 4 +++ .../operations/summarize_column_values_op.py | 4 +++ .../operations/summarize_definitions_op.py | 4 +++ .../operations/summarize_hed_tags_op.py | 4 +++ .../operations/summarize_hed_type_op.py | 4 +++ .../operations/summarize_hed_validation_op.py | 4 +++ .../summarize_sidecar_from_events_op.py | 4 +++ hed/tools/remodeling/validator.py | 13 +++++++- tests/tools/remodeling/test_validator.py | 33 +++++++++++++++++++ 23 files changed, 160 insertions(+), 31 deletions(-) diff --git a/hed/tools/remodeling/operations/base_op.py b/hed/tools/remodeling/operations/base_op.py index 42cd887bb..ffcdc4be4 100644 --- a/hed/tools/remodeling/operations/base_op.py +++ b/hed/tools/remodeling/operations/base_op.py @@ -36,3 +36,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): """ return df.copy() + + @staticmethod + @abstractmethod + def validate_input_data(parameters): + '''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema. + For example, whether two input arrays are the same length. Minimum implementation should return an empty list + to indicate no errors were found. If additional validation is necessary, method should perform the validation and + return a list with user friendly error strings. + ''' + return [] diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index c089420d7..9221330d5 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -1,6 +1,5 @@ """ Convert the type of the specified columns of a tabular file. """ #TODO finish implementation -#TODO Specify when requirements for decimal_places parameter from hed.tools.remodeling.operations.base_op import BaseOp @@ -14,9 +13,7 @@ class ConvertColumnsOp(BaseOp): Optional remodeling parameters: - **decimal_places** (*int*): Number decimal places to keep (for fixed only). - - Notes: - - Decimal places requirements not implemented + """ NAME = "convert_columns" @@ -82,3 +79,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): df_new = df.copy() return df_new + + @staticmethod + def validate_input_data(operations): + return [] diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 207d7ad3c..314017f3f 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -58,9 +58,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises ValueError: - - If factor_names is not empty and is not the same length as factor_values. - """ super().__init__(self.PARAMS, parameters) self.column_name = parameters['column_name'] @@ -99,3 +96,13 @@ def do_op(self, dispatcher, df, name, sidecar=None): column = factor_names[index] df_new[column] = factor_index.astype(int) return df_new + + @staticmethod + def validate_input_data(parameters): + if parameters.get("factor_names", False): + if len(parameters.get("factor_names")) != len(parameters.get("factor_values")): + return ["The list in factor_names, in the factor_column operation, should have the same number of items as factor_values."] + else: + return [] + else: + return [] diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 544fb682c..75a60f3a9 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -71,9 +71,6 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises ValueError: - - If the length of query names is not empty and not same length as queries. - """ super().__init__(self.PARAMS, parameters) self.queries = parameters['queries'] @@ -118,3 +115,11 @@ def do_op(self, dispatcher, df, name, sidecar=None): df_new = pd.concat(df_list, axis=1) df_new.replace('n/a', np.NaN, inplace=True) return df_new + + @staticmethod + def validate_input_data(parameters): + errors = [] + if parameters.get("query_names", False): + if len(parameters.get("query_names")) != len(parameters.get("queries")): + errors.append("The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + return errors diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 097285ee0..1cd362ad4 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -84,3 +84,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): df_new = pd.concat(df_list, axis=1) df_new.replace('n/a', np.NaN, inplace=True) return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index 7a8787b9b..cbd549731 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -59,17 +59,11 @@ def __init__(self, parameters): Parameters: parameters (dict): Actual values of the parameters for the operation. - :raises ValueError: - - If one of the match column is the merge column. - """ super().__init__(self.PARAMS, parameters) self.column_name = parameters["column_name"] self.event_code = parameters["event_code"] self.match_columns = parameters["match_columns"] - if self.column_name in self.match_columns: - raise ValueError("MergeColumnCannotBeMatchColumn", - f"Column {self.column_name} cannot be one of the match columns: {str(self.match_columns)}") self.set_durations = parameters["set_durations"] self.ignore_missing = parameters["ignore_missing"] @@ -167,3 +161,11 @@ def _update_durations(df_new, remove_groups): "onset", "duration"]].sum(skipna=True).max() df_new.loc[anchor, "duration"] = max( max_group, max_anchor) - df_new.loc[anchor, "onset"] + + @staticmethod + def validate_input_data(parameters): + errors = [] + if parameters.get("match_columns", False): + if parameters.get("column_name") in parameters.get("match_columns"): + errors.append("The column_name in the merge_consecutive operation cannot be specified as a match_column.") + return errors diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 319439c88..9764c2213 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -135,3 +135,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): # df_new.loc[group, self.number_column_name] = i + 1 return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index 507f41f3c..20513f616 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -87,3 +87,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): # df_new[self.number_column_name] = df_new.index + 1 return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 58b9fb437..18e0e7733 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -85,30 +85,17 @@ def __init__(self, parameters): Parameters: parameters (dict): Parameter values for required and optional parameters. - :raises ValueError: - - If an integer column is not a source column. - - If a map_list entry has the wrong number of items (source columns + destination columns). - """ super().__init__(self.PARAMS, parameters) self.source_columns = parameters['source_columns'] self.integer_sources = [] self.string_sources = self.source_columns if "integer_sources" in parameters: - self.integer_sources = parameters['integer_sources'] - if not set(self.integer_sources).issubset(set(self.source_columns)): - raise ValueError("IntegerSourceColumnsInvalid", - f"Integer courses {str(self.integer_sources)} must be in {str(self.source_columns)}") self.string_sources = list( set(self.source_columns).difference(set(self.integer_sources))) self.destination_columns = parameters['destination_columns'] self.map_list = parameters['map_list'] self.ignore_missing = parameters['ignore_missing'] - entry_len = len(self.source_columns) + len(self.destination_columns) - for index, item in enumerate(self.map_list): - if len(item) != entry_len: - raise ValueError("BadColumnMapEntry", - f"Map list entry {index} has {len(item)} elements, but must have {entry_len} elements") self.key_map = self._make_key_map() @@ -155,3 +142,16 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise ValueError("MapSourceValueMissing", f"{name}: Ignore missing is false, but source values [{missing}] are in data but not map") return df_new + + @staticmethod + def validate_input_data(parameters): + errors = [] + if len(set([len(x) for x in parameters.get("map_list")])) != 1: + errors.append("The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") + else: + if (len(parameters.get('source_columns')) + len(parameters.get("destination_columns"))) != len(parameters.get("map_list")[0]): + errors.append("The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") + if parameters.get("integer_sources", False): + if not all([(x in parameters.get("source_columns")) for x in parameters.get("integer_sources")]): + errors.append("All integer_sources in the remap_columns operation should be source_columns.") + return errors diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index a81fbd50c..99f542266 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -72,3 +72,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise KeyError("MissingColumnCannotBeRemoved", f"{name}: Ignore missing is False but a column in {str(self.column_names)} is " f"not in the data columns [{str(df_new.columns)}]") + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index 6eb0e2daa..ca866229b 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -68,3 +68,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): for value in self.remove_values: df_new = df_new.loc[df_new[self.column_name] != value, :] return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index 9aa33eea6..a9c3e0ab5 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -7,7 +7,7 @@ class RenameColumnsOp (BaseOp): """ Rename columns in a tabular file. Required remodeling parameters: - - **column_mapping** (*dict*): The names of the columns to be removed. + - **column_mapping** (*dict*): The names of the columns to be renamed. - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored. """ @@ -73,3 +73,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): raise KeyError("MappedColumnsMissingFromData", f"{name}: ignore_missing is False, mapping columns [{self.column_mapping}]" f" but df columns are [{str(df.columns)}") + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index 996349830..efeb2a290 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -83,3 +83,7 @@ def do_op(self, dispatcher, df, name, sidecar=None): ordered += [elem for elem in current_columns if elem not in ordered] df_new = df_new.loc[:, ordered] return df_new + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 45d86da28..7442447f6 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -182,3 +182,7 @@ def _create_onsets(df, onset_source): raise TypeError("BadOnsetInModel", f"Remodeling onset {str(onset)} must either be numeric or a column name.", "") return onsets + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 394f2129a..5ea03cff0 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -173,3 +173,7 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): for file in element.get("Files", []): sum_list.append(f"{indent}{indent}{file}") return "\n".join(sum_list) + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 2380e47ea..7073807ed 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -305,3 +305,7 @@ def sort_dict(count_dict, reverse=False): sorted_tuples = sorted( count_dict.items(), key=lambda x: x[1][0], reverse=reverse) return len(sorted_tuples), sorted_tuples + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 9f66a05a9..725c4f0a3 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -218,3 +218,7 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ return "" + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index d6e0c8040..284f19324 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -276,3 +276,7 @@ def _get_details(key_list, template, verbose=False): for tag_cnt in template[item.lower()]: key_details.append(tag_cnt.get_info(verbose=verbose)) return key_details + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 4b0341fcf..08ff7feee 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -251,3 +251,7 @@ def _level_details(level_counts, offset="", indent=""): level_list.append( f"{offset}{indent*3}Description: {details['description']}") return level_list + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index e3fb43ea8..b6a2381c1 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -279,3 +279,7 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings): results['sidecar_issues'][sidecar.name] = str_issues results['total_sidecar_issues'] = len(sidecar_issues) return results + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 3425da569..7a30bdc85 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -217,3 +217,7 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): f"Value columns: {str(specifics.get('Value info', {}).keys())}", f"Sidecar:\n{json.dumps(specifics['Sidecar'], indent=indent)}"] return "\n".join(sum_list) + + @staticmethod + def validate_input_data(parameters): + return [] diff --git a/hed/tools/remodeling/validator.py b/hed/tools/remodeling/validator.py index 3b8427e6d..60ce68bec 100644 --- a/hed/tools/remodeling/validator.py +++ b/hed/tools/remodeling/validator.py @@ -97,7 +97,8 @@ def __init__(self): self.validator = Draft202012Validator(self.schema) def validate(self, operations): - """ Validates a dictionary against the json schema specification for the remodeler file and returns the a list of user friendly error messages + """ Validates a dictionary against the json schema specification for the remodeler file, plus any additional data validation that is + necessary and returns a list of user friendly error messages. Parameters: **operations** (*dict*): Dictionary with input operations to run through the remodeler @@ -110,6 +111,16 @@ def validate(self, operations): for error in sorted(self.validator.iter_errors(operations), key=lambda e: e.path): list_of_error_strings.append( self._parse_message(error, operations)) + if list_of_error_strings: + return list_of_error_strings + + operation_by_parameters = [(operation["operation"], operation["parameters"]) for operation in operations] + + for index, operation in enumerate(operation_by_parameters): + error_strings = valid_operations[operation[0]].validate_input_data(operation[1]) + for error_string in error_strings: + list_of_error_strings.append("Operation %s: %s" %(index+1, error_string)) + return list_of_error_strings def _parse_message(self, error, operations): diff --git a/tests/tools/remodeling/test_validator.py b/tests/tools/remodeling/test_validator.py index 90dfb5898..15447edea 100644 --- a/tests/tools/remodeling/test_validator.py +++ b/tests/tools/remodeling/test_validator.py @@ -117,3 +117,36 @@ def test_validate_parameters(self): double_item_in_array_nested[0]["parameters"]["new_events"]["response"]["copy_columns"] = ['response', 'response'] error_strings = validator.validate(double_item_in_array_nested) self.assertEqual(error_strings[0], "Operation 1: The list in copy_columns, response, new_events, in the split_rows operation, should only contain unique items.") + + def test_validate_parameter_data(self): + validator = RemodelerValidator() + + factor_column_validate = [deepcopy(self.remodel_file)[1]] + factor_column_validate[0]["parameters"]["factor_names"] = ["stopped"] + error_strings = validator.validate(factor_column_validate) + self.assertEqual(error_strings[0], "Operation 1: The list in factor_names, in the factor_column operation, should have the same number of items as factor_values.") + + factor_hed_tags_validate = [deepcopy(self.remodel_file)[2]] + factor_hed_tags_validate[0]["parameters"]["query_names"] = ["correct"] + error_strings = validator.validate(factor_hed_tags_validate) + self.assertEqual(error_strings[0], "Operation 1: The list in query_names, in the factor_hed_tags operation, should have the same number of items as queries.") + + merge_consecutive_validate = [deepcopy(self.remodel_file)[4]] + merge_consecutive_validate[0]["parameters"]["match_columns"].append("trial_type") + error_strings = validator.validate(merge_consecutive_validate) + self.assertEqual(error_strings[0], "Operation 1: The column_name in the merge_consecutive operation cannot be specified as a match_column.") + + remap_columns_validate_same_length = [deepcopy(self.remodel_file)[5]] + remap_columns_validate_same_length[0]["parameters"]["map_list"][0] = [""] + error_strings = validator.validate(remap_columns_validate_same_length) + self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should all have the same length.") + + remap_columns_validate_right_length = [deepcopy(self.remodel_file[5])] + remap_columns_validate_right_length[0]["parameters"]["map_list"] = [["string1", "string2"], ["string3", "string4"]] + error_strings = validator.validate(remap_columns_validate_right_length) + self.assertEqual(error_strings[0], "Operation 1: The lists specified in the map_list parameter in the remap_columns operation should have a length equal to the number of source columns + the number of destination columns.") + + remap_columns_integer_sources = [deepcopy(self.remodel_file[5])] + remap_columns_integer_sources[0]["parameters"]["integer_sources"] = ["unknown_column"] + error_strings = validator.validate(remap_columns_integer_sources) + self.assertEqual(error_strings[0], "Operation 1: All integer_sources in the remap_columns operation should be source_columns.") \ No newline at end of file From d4a19e58b9048d050e8d73301a72e81363e576df Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Fri, 5 Jan 2024 15:46:56 +0100 Subject: [PATCH 19/20] correct base class instantiation and summaries --- hed/tools/remodeling/operations/convert_columns_op.py | 2 +- hed/tools/remodeling/operations/factor_column_op.py | 6 +----- hed/tools/remodeling/operations/factor_hed_tags_op.py | 2 +- hed/tools/remodeling/operations/factor_hed_type_op.py | 2 +- .../remodeling/operations/merge_consecutive_op.py | 2 +- hed/tools/remodeling/operations/number_groups_op.py | 2 +- hed/tools/remodeling/operations/number_rows_op.py | 2 +- hed/tools/remodeling/operations/remap_columns_op.py | 2 +- hed/tools/remodeling/operations/remove_columns_op.py | 2 +- hed/tools/remodeling/operations/remove_rows_op.py | 2 +- hed/tools/remodeling/operations/rename_columns_op.py | 2 +- hed/tools/remodeling/operations/reorder_columns_op.py | 2 +- hed/tools/remodeling/operations/split_rows_op.py | 2 +- .../remodeling/operations/summarize_column_names_op.py | 10 +++++----- .../operations/summarize_column_values_op.py | 10 +++++----- .../remodeling/operations/summarize_definitions_op.py | 10 +++++----- .../remodeling/operations/summarize_hed_tags_op.py | 6 +++++- .../remodeling/operations/summarize_hed_type_op.py | 6 +++++- .../operations/summarize_hed_validation_op.py | 6 +++++- .../operations/summarize_sidecar_from_events_op.py | 6 +++++- 20 files changed, 48 insertions(+), 36 deletions(-) diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py index 9221330d5..3768f9feb 100644 --- a/hed/tools/remodeling/operations/convert_columns_op.py +++ b/hed/tools/remodeling/operations/convert_columns_op.py @@ -58,7 +58,7 @@ def __init__(self, parameters): parameters (dict): Parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_names = parameters['column_names'] self.convert_to = parameters['convert_to'] self.decimal_places = parameters.get('decimal_places', None) diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py index 314017f3f..992b8e8ba 100644 --- a/hed/tools/remodeling/operations/factor_column_op.py +++ b/hed/tools/remodeling/operations/factor_column_op.py @@ -59,14 +59,10 @@ def __init__(self, parameters): parameters (dict): Parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters['column_name'] self.factor_values = parameters['factor_values'] self.factor_names = parameters['factor_names'] - if self.factor_names and len(self.factor_values) != len(self.factor_names): - raise ValueError("FactorNamesLenBad", - f"The factor_names length {len(self.factor_names)} must be empty or equal" + - f"to the factor_values length {len(self.factor_values)} .") def do_op(self, dispatcher, df, name, sidecar=None): """ Create factor columns based on values in a specified column. diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py index 75a60f3a9..d28fa5e83 100644 --- a/hed/tools/remodeling/operations/factor_hed_tags_op.py +++ b/hed/tools/remodeling/operations/factor_hed_tags_op.py @@ -72,7 +72,7 @@ def __init__(self, parameters): parameters (dict): Actual values of the parameters for the operation. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.queries = parameters['queries'] self.query_names = parameters['query_names'] self.remove_types = parameters['remove_types'] diff --git a/hed/tools/remodeling/operations/factor_hed_type_op.py b/hed/tools/remodeling/operations/factor_hed_type_op.py index 1cd362ad4..5c7f8885e 100644 --- a/hed/tools/remodeling/operations/factor_hed_type_op.py +++ b/hed/tools/remodeling/operations/factor_hed_type_op.py @@ -50,7 +50,7 @@ def __init__(self, parameters): parameters (dict): Actual values of the parameters for the operation. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.type_tag = parameters["type_tag"] self.type_values = parameters["type_values"] diff --git a/hed/tools/remodeling/operations/merge_consecutive_op.py b/hed/tools/remodeling/operations/merge_consecutive_op.py index cbd549731..89459b302 100644 --- a/hed/tools/remodeling/operations/merge_consecutive_op.py +++ b/hed/tools/remodeling/operations/merge_consecutive_op.py @@ -60,7 +60,7 @@ def __init__(self, parameters): parameters (dict): Actual values of the parameters for the operation. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters["column_name"] self.event_code = parameters["event_code"] self.match_columns = parameters["match_columns"] diff --git a/hed/tools/remodeling/operations/number_groups_op.py b/hed/tools/remodeling/operations/number_groups_op.py index 9764c2213..1a2bd1fa3 100644 --- a/hed/tools/remodeling/operations/number_groups_op.py +++ b/hed/tools/remodeling/operations/number_groups_op.py @@ -73,7 +73,7 @@ class NumberGroupsOp(BaseOp): } def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.number_column_name = parameters['number_column_name'] self.source_column = parameters['source_column'] self.start = parameters['start'] diff --git a/hed/tools/remodeling/operations/number_rows_op.py b/hed/tools/remodeling/operations/number_rows_op.py index 20513f616..c2b38a08a 100644 --- a/hed/tools/remodeling/operations/number_rows_op.py +++ b/hed/tools/remodeling/operations/number_rows_op.py @@ -45,7 +45,7 @@ class NumberRowsOp(BaseOp): } def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.number_column_name = parameters['number_column_name'] self.overwrite = parameters.get('overwrite', False) self.match_value = parameters.get('match_value', False) diff --git a/hed/tools/remodeling/operations/remap_columns_op.py b/hed/tools/remodeling/operations/remap_columns_op.py index 18e0e7733..176218be7 100644 --- a/hed/tools/remodeling/operations/remap_columns_op.py +++ b/hed/tools/remodeling/operations/remap_columns_op.py @@ -86,7 +86,7 @@ def __init__(self, parameters): parameters (dict): Parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.source_columns = parameters['source_columns'] self.integer_sources = [] self.string_sources = self.source_columns diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py index 99f542266..a20015d48 100644 --- a/hed/tools/remodeling/operations/remove_columns_op.py +++ b/hed/tools/remodeling/operations/remove_columns_op.py @@ -41,7 +41,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_names = parameters['column_names'] ignore_missing = parameters['ignore_missing'] if ignore_missing: diff --git a/hed/tools/remodeling/operations/remove_rows_op.py b/hed/tools/remodeling/operations/remove_rows_op.py index ca866229b..181f70d15 100644 --- a/hed/tools/remodeling/operations/remove_rows_op.py +++ b/hed/tools/remodeling/operations/remove_rows_op.py @@ -45,7 +45,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_name = parameters["column_name"] self.remove_values = parameters["remove_values"] diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py index a9c3e0ab5..160427b81 100644 --- a/hed/tools/remodeling/operations/rename_columns_op.py +++ b/hed/tools/remodeling/operations/rename_columns_op.py @@ -43,7 +43,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters """ - super().__init__("rename_columns", parameters) + super().__init__(parameters) self.column_mapping = parameters['column_mapping'] if parameters['ignore_missing']: self.error_handling = 'ignore' diff --git a/hed/tools/remodeling/operations/reorder_columns_op.py b/hed/tools/remodeling/operations/reorder_columns_op.py index efeb2a290..becf66e04 100644 --- a/hed/tools/remodeling/operations/reorder_columns_op.py +++ b/hed/tools/remodeling/operations/reorder_columns_op.py @@ -46,7 +46,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.column_order = parameters['column_order'] self.ignore_missing = parameters['ignore_missing'] self.keep_others = parameters['keep_others'] diff --git a/hed/tools/remodeling/operations/split_rows_op.py b/hed/tools/remodeling/operations/split_rows_op.py index 7442447f6..04dbb65df 100644 --- a/hed/tools/remodeling/operations/split_rows_op.py +++ b/hed/tools/remodeling/operations/split_rows_op.py @@ -85,7 +85,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.anchor_column = parameters['anchor_column'] self.new_events = parameters['new_events'] self.remove_parent_row = parameters['remove_parent_row'] diff --git a/hed/tools/remodeling/operations/summarize_column_names_op.py b/hed/tools/remodeling/operations/summarize_column_names_op.py index 5ea03cff0..f267eb439 100644 --- a/hed/tools/remodeling/operations/summarize_column_names_op.py +++ b/hed/tools/remodeling/operations/summarize_column_names_op.py @@ -49,7 +49,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -78,6 +78,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): summary.update_summary( {"name": name, "column_names": list(df_new.columns)}) return df_new + + @staticmethod + def validate_input_data(parameters): + return [] class ColumnNamesSummary(BaseSummary): @@ -173,7 +177,3 @@ def _get_dataset_string(result, indent=BaseSummary.DISPLAY_INDENT): for file in element.get("Files", []): sum_list.append(f"{indent}{indent}{file}") return "\n".join(sum_list) - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_column_values_op.py b/hed/tools/remodeling/operations/summarize_column_values_op.py index 7073807ed..40518c414 100644 --- a/hed/tools/remodeling/operations/summarize_column_values_op.py +++ b/hed/tools/remodeling/operations/summarize_column_values_op.py @@ -78,7 +78,7 @@ def __init__(self, parameters): """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.skip_columns = parameters['skip_columns'] @@ -114,6 +114,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class ColumnValueSummary(BaseSummary): @@ -305,7 +309,3 @@ def sort_dict(count_dict, reverse=False): sorted_tuples = sorted( count_dict.items(), key=lambda x: x[1][0], reverse=reverse) return len(sorted_tuples), sorted_tuples - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py index 725c4f0a3..99b06582a 100644 --- a/hed/tools/remodeling/operations/summarize_definitions_op.py +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -50,7 +50,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -78,6 +78,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class DefinitionSummary(BaseSummary): def __init__(self, sum_op, hed_schema, known_defs=None): @@ -218,7 +222,3 @@ def _get_individual_string(result, indent=BaseSummary.DISPLAY_INDENT): """ return "" - - @staticmethod - def validate_input_data(parameters): - return [] diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py index 284f19324..7682f4e88 100644 --- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py @@ -89,7 +89,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.tags = parameters['tags'] @@ -126,6 +126,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedTagSummary(BaseSummary): diff --git a/hed/tools/remodeling/operations/summarize_hed_type_op.py b/hed/tools/remodeling/operations/summarize_hed_type_op.py index 08ff7feee..364c3d91f 100644 --- a/hed/tools/remodeling/operations/summarize_hed_type_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_type_op.py @@ -60,7 +60,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.type_tag = parameters['type_tag'].lower() @@ -91,6 +91,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedTypeSummary(BaseSummary): diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py index b6a2381c1..6d43d9cfa 100644 --- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py +++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py @@ -60,7 +60,7 @@ def __init__(self, parameters): parameters (dict): Dictionary with the parameter values for required and optional parameters. """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) @@ -91,6 +91,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): 'schema': dispatcher.hed_schema, 'sidecar': sidecar}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class HedValidationSummary(BaseSummary): diff --git a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py index 7a30bdc85..e06765325 100644 --- a/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py +++ b/hed/tools/remodeling/operations/summarize_sidecar_from_events_op.py @@ -69,7 +69,7 @@ def __init__(self, parameters): """ - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.skip_columns = parameters['skip_columns'] @@ -102,6 +102,10 @@ def do_op(self, dispatcher, df, name, sidecar=None): {'df': dispatcher.post_proc_data(df_new), 'name': name}) return df_new + @staticmethod + def validate_input_data(parameters): + return [] + class EventsToSidecarSummary(BaseSummary): From cb53d4466ca13d3b23ea740f9b1921c3055fc359 Mon Sep 17 00:00:00 2001 From: Monique Denissen Date: Fri, 5 Jan 2024 15:47:20 +0100 Subject: [PATCH 20/20] update tests --- .../remodeling/operations/test_base_op.py | 4 + .../operations/test_base_summary.py | 10 +- .../operations/test_convert_columns_op.py | 7 - .../operations/test_factor_column_op.py | 7 - .../operations/test_merge_consecutive_op.py | 12 -- .../operations/test_number_groups.py | 70 ------- .../operations/test_number_rows_op.py | 188 ------------------ .../operations/test_remap_columns_op.py | 41 ---- .../operations/test_split_rows_op.py | 3 +- .../test_summarize_column_names_op.py | 2 +- .../test_summarize_column_values_op.py | 2 +- .../test_summarize_definitions_op.py | 12 -- .../operations/test_summarize_hed_tags_op.py | 17 -- .../operations/test_summarize_hed_type_op.py | 7 +- .../test_summarize_hed_validation_op.py | 6 - 15 files changed, 19 insertions(+), 369 deletions(-) diff --git a/tests/tools/remodeling/operations/test_base_op.py b/tests/tools/remodeling/operations/test_base_op.py index 2bef3e5da..e581cbdb7 100644 --- a/tests/tools/remodeling/operations/test_base_op.py +++ b/tests/tools/remodeling/operations/test_base_op.py @@ -20,6 +20,10 @@ class TestOp(BaseOp): def do_op(self, dispatcher, df, name, sidecar=None): return df + + @staticmethod + def validate_input_data(parameters): + return [] class Test(unittest.TestCase): diff --git a/tests/tools/remodeling/operations/test_base_summary.py b/tests/tools/remodeling/operations/test_base_summary.py index 5faed6467..e45d620a8 100644 --- a/tests/tools/remodeling/operations/test_base_summary.py +++ b/tests/tools/remodeling/operations/test_base_summary.py @@ -6,6 +6,7 @@ class TestOp(BaseOp): + NAME = "test_op" PARAMS = { "operation": "test_summary_op", "required_parameters": { @@ -20,11 +21,18 @@ class TestOp(BaseOp): SUMMARY_TYPE = "test_sum" def __init__(self, parameters): - super().__init__(self.PARAMS, parameters) + super().__init__(parameters) self.summary_name = parameters['summary_name'] self.summary_filename = parameters['summary_filename'] self.append_timecode = parameters.get('append_timecode', False) + def do_op(self, dispatcher, df, name, sidecar=None): + return df.copy() + + @staticmethod + def validate_input_data(parameters): + return [] + class TestSummary(BaseSummary): diff --git a/tests/tools/remodeling/operations/test_convert_columns_op.py b/tests/tools/remodeling/operations/test_convert_columns_op.py index 48d177b0f..d988f616b 100644 --- a/tests/tools/remodeling/operations/test_convert_columns_op.py +++ b/tests/tools/remodeling/operations/test_convert_columns_op.py @@ -36,12 +36,5 @@ def setUp(self): def tearDownClass(cls): pass - def test_constructor_bad_convert_to(self): - self.base_parameters["convert_to"] = "blech" - with self.assertRaises(ValueError) as context: - ConvertColumnsOp(self.base_parameters) - self.assertEqual(context.exception.args[0], "CannotConvertToSpecifiedType") - - if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_factor_column_op.py b/tests/tools/remodeling/operations/test_factor_column_op.py index 402353d5e..454f91d81 100644 --- a/tests/tools/remodeling/operations/test_factor_column_op.py +++ b/tests/tools/remodeling/operations/test_factor_column_op.py @@ -40,13 +40,6 @@ def setUp(self): def tearDownClass(cls): pass - def test_bad_constructor(self): - self.base_parameters["factor_names"] = ["stopped"] - with self.assertRaises(ValueError) as context: - FactorColumnOp(self.base_parameters) - self.assertEqual(context.exception.args[0], "FactorNamesLenBad", - "factor_names and factor_values must be same length") - def test_no_names(self): self.base_parameters["factor_names"] = [] self.base_parameters["factor_values"] = [] diff --git a/tests/tools/remodeling/operations/test_merge_consecutive_op.py b/tests/tools/remodeling/operations/test_merge_consecutive_op.py index 9b504b221..5dcbf720f 100644 --- a/tests/tools/remodeling/operations/test_merge_consecutive_op.py +++ b/tests/tools/remodeling/operations/test_merge_consecutive_op.py @@ -50,18 +50,6 @@ def get_dfs(self, op): df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), 'run-01') return df, self.dispatch.post_proc_data(df_new) - def test_valid(self): - parms = json.loads(self.json_parms) - op = MergeConsecutiveOp(parms) - self.assertIsInstance(op, MergeConsecutiveOp) - - def test_invalid(self): - parms = json.loads(self.json_parms) - parms["column_name"] = "sex" - with self.assertRaises(ValueError) as context: - MergeConsecutiveOp(parms) - self.assertEqual(context.exception.args[0], "MergeColumnCannotBeMatchColumn") - def test_do_op_valid(self): # Test when no extras but ignored. parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_number_groups.py b/tests/tools/remodeling/operations/test_number_groups.py index 1bae16d80..fc3f056f7 100644 --- a/tests/tools/remodeling/operations/test_number_groups.py +++ b/tests/tools/remodeling/operations/test_number_groups.py @@ -203,73 +203,3 @@ def test_number_groups_new_column(self): # "split_rows should not change the input df columns") # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "split_rows should not change the input df values") - - # test expected breaks parameters - def test_missing_startstop_param(self): - # test when missing parameter - parms = json.loads(self.json_missing_startstop_parms) - - # with self.assertRaisesRegex(KeyError, "MissingRequiredParameters"): - # op = NumberGroupsOp(parms) - - def test_wrong_startstop_param(self): - # test when a start stop parameter is missing - parms = json.loads(self.json_wrong_startstop_parms) - - # with self.assertRaisesRegex(KeyError, "BadParameter"): - # op = NumberGroupsOp(parms) - - def test_wrong_startstop_type_param(self): - # Test when wrong type in start stop parameters - parms = json.loads(self.json_wrong_startstop_type_parms) - # TODO fix code and put back in - # with self.assertRaisesRegex(TypeError, "BadType"): - # op = NumberGroupsOp(parms) - - def test_wrong_value_inclusion(self): - # test when a wrong value is given for inclusion (only accept include and exclude string) - parms = json.loads(self.json_wrong_inclusion_parms) - - # with self.assertRaisesRegex(ValueError, "BadValue"): - # op = NumberGroupsOp(parms) - - # test expected breaks event file - parameters - def test_existing_column_overwrite_unspecified(self): - # Test when existing column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_false(self): - # Test when existing column name is given with overwrite specified False - parms = json.loads(self.json_overwrite_false_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_missing_source_column(self): - # Test when source column does not exist in event file - parms = json.loads(self.json_parms) - op = NumberGroupsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_missing_startstop_value(self): - # Test when one of startstop values does not exist in source column - parms = json.loads(self.json_missing_startstop_value_parms) - op = NumberGroupsOp(parms) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # - # with self.assertRaisesRegex(ValueError, "MissingValue"): - # op.do_op(self.dispatcher, df_test, self.file_name) - diff --git a/tests/tools/remodeling/operations/test_number_rows_op.py b/tests/tools/remodeling/operations/test_number_rows_op.py index 9c60a63aa..78fdc6bcb 100644 --- a/tests/tools/remodeling/operations/test_number_rows_op.py +++ b/tests/tools/remodeling/operations/test_number_rows_op.py @@ -166,65 +166,8 @@ def setUpClass(cls): "number_column_name": "number" } - overwrite_false_parameters = { - "number_column_name": "number", - "overwrite": False - } - - overwrite_true_parameters = { - "number_column_name": "number", - "overwrite": True - } - - filter_complete_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "40"} - } - - filter_incomplete_parameters = { - "number_column_name": "number", - "match_value": {"column": "code"} - } - - filter_invalid_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "40", "label": "code"} - } - - filter_wrong_type_parameters = { - "number_column_name": "number", - "match_value": {"column": 246, "value": []} - } - - filter_missing_column_parameters = { - "number_column_name": "number", - "match_value": {"column": "trial_type", "value": "40"} - } - - filter_missing_value_parameters = { - "number_column_name": "number", - "match_value": {"column": "code", "value": "stop_trial"} - } - - filter_overwrite_parameters = { - "number_column_name": "number", - "match_value": {"column": "number", "value": "40"}, - "overwrite": True - } - cls.json_parms = json.dumps(base_parameters) - cls.json_overwrite_false_parms = json.dumps(overwrite_false_parameters) - cls.json_overwrite_true_parms = json.dumps(overwrite_true_parameters) - - cls.json_filter_complete_parameters = json.dumps(filter_complete_parameters) - cls.json_filter_incomplete_parameters = json.dumps(filter_incomplete_parameters) - cls.json_filter_invalid_parameters = json.dumps(filter_invalid_parameters) - cls.json_filter_wrong_type_parameters = json.dumps(filter_wrong_type_parameters) - cls.json_filter_missing_column_parameters = json.dumps(filter_missing_column_parameters) - cls.json_filter_missing_value_parameters = json.dumps(filter_missing_value_parameters) - cls.json_filter_overwrite_parameters = json.dumps(filter_overwrite_parameters) - cls.dispatcher = None cls.file_name = None @@ -257,136 +200,5 @@ def test_number_rows_new_column(self): # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), # "number_rows should not change the input df values") - def test_existing_column_overwrite_false(self): - # Test when existing column name is given with overwrite specified False - parms = json.loads(self.json_overwrite_false_parms) - op = NumberRowsOp(parms) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn") as context: - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_unspecified(self): - # Test when existing column name is given with overwrite unspecified (=False) - parms = json.loads(self.json_parms) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - - # with self.assertRaisesRegex(ValueError, "ExistingNumberColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_existing_column_overwrite_true(self): - # Test when existing column name is given with overwrite True - parms = json.loads(self.json_overwrite_true_parms) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_check = pd.DataFrame(self.overwritten_data, columns=self.existing_sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), - # "numbered_events should have the same columns as original dataframe in case of overwrite") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(all([i + 1 == value for (i, value) in enumerate(df_new[parms['number_column_name']])]), - # "event should be numbered consecutively from 1 to length of the dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - def test_filter_complete_parameters(self): - # Test when valid complete filter/match_value parameters are given - parms = json.loads(self.json_filter_complete_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_check = pd.DataFrame(self.filter_numbered_data, columns=self.numbered_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.numbered_columns), - # "numbered_events should have expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - def test_filter_incomplete_parameters(self): - # Test when filter/match_value parameters are not complete - parms = json.loads(self.json_filter_incomplete_parameters) - - # with self.assertRaisesRegex(KeyError, "MissingRequiredParameters"): - # op = NumberRowsOp(parms) - - def test_filter_invalid_parameters(self): - # Test when invalid filter/match_value parameters are given - parms = json.loads(self.json_filter_invalid_parameters) - - # with self.assertRaisesRegex(KeyError, "BadParameter"): - # op = NumberRowsOp(parms) - - def test_filter_wrong_type_parameters(self): - # Test when invalid filter/match_value parameters are given - parms = json.loads(self.json_filter_wrong_type_parameters) - # TODO: need to fix type - # with self.assertRaisesRegex(TypeError, "BadType"): - # op = NumberRowsOp(parms) - - def test_filter_missing_column_parameters(self): - # Test when specified filter column is not in event file - parms = json.loads(self.json_filter_missing_column_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - # with self.assertRaisesRegex(ValueError, "MissingMatchColumn"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_filter_missing_value_parameters(self): - # Test when specified filter value is not in event file - parms = json.loads(self.json_filter_missing_value_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns) - - # with self.assertRaisesRegex(ValueError, "MissingMatchValue"): - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - - def test_filter_overwrite(self): - # Test when specified filter value is not in event file - parms = json.loads(self.json_filter_overwrite_parameters) - op = NumberRowsOp(parms) - # df = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_test = pd.DataFrame(self.sample_data, columns=self.existing_sample_columns) - # df_check = pd.DataFrame(self.filter_overwritten_numbered_data, columns=self.existing_sample_columns) - # df_new = op.do_op(self.dispatcher, df_test, self.file_name) - # df_new = df_new.fillna('n/a') - - # self.assertTrue(list(df_new.columns) == list(self.existing_sample_columns), - # "numbered_events should have expected columns") - # self.assertTrue(len(df_new) == len(df_test), - # "numbered_events should have same length as original dataframe") - # self.assertTrue(np.array_equal(df_new.to_numpy(), df_check.to_numpy()), - # "numbered_events should not differ from check") - - # Test that df has not been changed by the op - # self.assertTrue(list(df.columns) == list(df_test.columns), - # "split_rows should not change the input df columns") - # self.assertTrue(np.array_equal(df.to_numpy(), df_test.to_numpy()), - # "split_rows should not change the input df values") - - if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_remap_columns_op.py b/tests/tools/remodeling/operations/test_remap_columns_op.py index be4637d73..cd05c7ae3 100644 --- a/tests/tools/remodeling/operations/test_remap_columns_op.py +++ b/tests/tools/remodeling/operations/test_remap_columns_op.py @@ -64,47 +64,6 @@ def get_dfs(self, op, df=None): df_new = op.do_op(self.dispatch, self.dispatch.prep_data(df), 'run-01') return df, self.dispatch.post_proc_data(df_new) - def test_valid(self): - # Test when no extras but ignored. - parms = json.loads(self.json_parms) - op = RemapColumnsOp(parms) - df, df_test = self.get_dfs(op) - self.assertNotIn("response_type", df.columns, "remap_columns before does not have response_type column") - self.assertIn("response_type", df_test.columns, "remap_columns after has response_type column") - - def test_invalid_params(self): - parms1 = json.loads(self.json_parms) - parms1["source_columns"] = [] - with self.assertRaises(ValueError) as context1: - RemapColumnsOp(parms1) - self.assertEqual(context1.exception.args[0], "EmptySourceColumns") - - parms2 = json.loads(self.json_parms) - parms2["destination_columns"] = [] - with self.assertRaises(ValueError) as context2: - RemapColumnsOp(parms2) - self.assertEqual(context2.exception.args[0], "EmptyDestinationColumns") - - parms3 = json.loads(self.json_parms) - parms3["map_list"][1] = ["right", "correct_right"], - with self.assertRaises(ValueError) as context3: - RemapColumnsOp(parms3) - self.assertEqual(context3.exception.args[0], "BadColumnMapEntry") - - parms4 = json.loads(self.json_parms1) - parms4["integer_sources"] = ["test", "baloney"] - with self.assertRaises(ValueError) as context4: - RemapColumnsOp(parms4) - self.assertEqual(context4.exception.args[0], "IntegerSourceColumnsInvalid") - - def test_integer_sources(self): - parms1 = json.loads(self.json_parms1) - op1 = RemapColumnsOp(parms1) - self.assertIn('test', op1.integer_sources) - parms2 = json.loads(self.json_parms2) - op2 = RemapColumnsOp(parms2) - self.assertIn('test', op2.integer_sources) - def test_valid_missing(self): # Test when no extras but ignored. parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_split_rows_op.py b/tests/tools/remodeling/operations/test_split_rows_op.py index 9710e1b61..df9e4ec3a 100644 --- a/tests/tools/remodeling/operations/test_split_rows_op.py +++ b/tests/tools/remodeling/operations/test_split_rows_op.py @@ -155,8 +155,7 @@ def test_split_rows_from_files(self): df = pd.read_csv(self.events_path, delimiter='\t', header=0, dtype=str, keep_default_na=False, na_values=None) with open(self.model1_path) as fp: operation_list = json.load(fp) - operations, errors = Dispatcher.parse_operations(operation_list) - self.assertFalse(errors, 'split_rows should not give errors if operation is correct') + operations = Dispatcher.parse_operations(operation_list) dispatch = Dispatcher(operation_list) df = dispatch.prep_data(df) df_new = operations[0].do_op(dispatch, df, "Name") diff --git a/tests/tools/remodeling/operations/test_summarize_column_names_op.py b/tests/tools/remodeling/operations/test_summarize_column_names_op.py index 2aadd8e72..c0afbf1dc 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_names_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_names_op.py @@ -56,7 +56,7 @@ def test_constructor(self): def test_summary_op(self): with open(self.model_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions='8.1.0') df = dispatch.get_data_file(self.events_path) df = dispatch.prep_data(df) diff --git a/tests/tools/remodeling/operations/test_summarize_column_values_op.py b/tests/tools/remodeling/operations/test_summarize_column_values_op.py index 5fe53c4ab..9e838d5d5 100644 --- a/tests/tools/remodeling/operations/test_summarize_column_values_op.py +++ b/tests/tools/remodeling/operations/test_summarize_column_values_op.py @@ -93,7 +93,7 @@ def test_summary_op(self): '../../../data/remodel_tests/aomic_sub-0013_summary_all_rmdl.json')) with open(column_summary_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(events) old_len = len(df) diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py index 3e6843b25..76a469d2e 100644 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -24,18 +24,6 @@ def setUpClass(cls): def tearDownClass(cls): pass - def test_constructor(self): - parms = json.loads(self.json_parms) - parms["expand_context"] = "" - with self.assertRaises(KeyError) as context: - SummarizeDefinitionsOp(parms) - self.assertEqual(context.exception.args[0], "BadParameter") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeDefinitionsOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index 3e1c1d128..196a95758 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -44,23 +44,6 @@ def setUpClass(cls): def tearDownClass(cls): pass - def test_constructor(self): - parms = json.loads(self.json_parms) - sum_op1 = SummarizeHedTagsOp(parms) - self.assertIsInstance(sum_op1, SummarizeHedTagsOp, "constructor creates an object of the correct type") - - def test_constructor_bad_params(self): - parms = json.loads(self.json_parms) - parms["include_context"] = "" - with self.assertRaises(TypeError) as context: - SummarizeHedTagsOp(parms) - self.assertEqual(context.exception.args[0], "BadType") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeHedTagsOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op_no_replace_no_context_remove_on(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py index 642539967..b4cedafdc 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_type_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_type_op.py @@ -63,7 +63,7 @@ def test_summary(self): parms = json.load(fp) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path) context1 = dispatch.summary_dicts['AOMIC_condition_variables'] @@ -85,7 +85,7 @@ def test_text_summary_with_levels(self): parms = json.load(fp) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events_wh) - parsed_commands, errors = Dispatcher.parse_operations(parms) + parsed_commands = Dispatcher.parse_operations(parms) sum_op = parsed_commands[2] sum_op.do_op(dispatch, dispatch.prep_data(df), 'run-01', sidecar=self.sidecar_path_wh) context1 = dispatch.summary_dicts['AOMIC_condition_variables'] @@ -97,8 +97,7 @@ def test_text_summary(self): with open(self.summary_path, 'r') as fp: parms = json.load(fp) - parsed_commands, errors = Dispatcher.parse_operations(parms) - self.assertFalse(errors) + parsed_commands = Dispatcher.parse_operations(parms) dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) df = dispatch.get_data_file(self.events) old_len = len(df) diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py index 97b87df83..cf86665cc 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py @@ -42,12 +42,6 @@ def test_constructor(self): sum_op1 = SummarizeHedValidationOp(parms) self.assertIsInstance(sum_op1, SummarizeHedValidationOp, "constructor creates an object of the correct type") - parms2 = json.loads(self.json_parms) - parms2["mystery"] = True - with self.assertRaises(KeyError) as context: - SummarizeHedValidationOp(parms2) - self.assertEqual(context.exception.args[0], "BadParameter") - def test_do_op(self): dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) parms = json.loads(self.json_parms)