Skip to content

Commit

Permalink
Merge pull request #828 from monique2208/dev-schema
Browse files Browse the repository at this point in the history
Validate remodeler file using json schema
  • Loading branch information
VisLab authored Jan 9, 2024
2 parents e2c7aef + e9aa9f7 commit 6b6ed14
Show file tree
Hide file tree
Showing 44 changed files with 1,712 additions and 1,088 deletions.
6 changes: 4 additions & 2 deletions hed/tools/remodeling/cli/run_remodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from hed.errors.exceptions import HedFileError
from hed.tools.util.io_util import get_file_list, get_task_from_file, get_task_dict
from hed.tools.bids.bids_dataset import BidsDataset
from hed.tools.remodeling.validator import RemodelerValidator
from hed.tools.remodeling.dispatcher import Dispatcher
from hed.tools.remodeling.backup_manager import BackupManager

Expand Down Expand Up @@ -109,10 +110,11 @@ def parse_arguments(arg_list=None):
print(f"Data directory: {args.data_dir}\nModel path: {args.model_path}")
with open(args.model_path, 'r') as fp:
operations = json.load(fp)
parsed_operations, errors = Dispatcher.parse_operations(operations)
validator = RemodelerValidator()
errors = validator.validate(operations)
if errors:
raise ValueError("UnableToFullyParseOperations",
f"Fatal operation error, cannot continue:\n{Dispatcher.errors_to_str(errors)}")
f"Fatal operation error, cannot continue:\n{errors}")
return args, operations


Expand Down
34 changes: 5 additions & 29 deletions hed/tools/remodeling/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, operation_list, data_root=None,
""" Constructor for the dispatcher.
Parameters:
operation_list (list): List of unparsed operations.
operation_list (list): List of valid unparsed operations.
data_root (str or None): Root directory for the dataset. If none, then backups are not made.
hed_versions (str, list, HedSchema, or HedSchemaGroup): The HED schema.
Expand All @@ -42,11 +42,7 @@ def __init__(self, operation_list, data_root=None,
raise HedFileError("BackupDoesNotExist",
f"Remodeler cannot be run with a dataset without first creating the "
f"{self.backup_name} backup for {self.data_root}", "")
op_list, errors = self.parse_operations(operation_list)
if errors:
these_errors = self.errors_to_str(errors, 'Dispatcher failed due to invalid operations')
raise ValueError("InvalidOperationList", f"{these_errors}")
self.parsed_ops = op_list
self.parsed_ops = self.parse_operations(operation_list)
self.hed_schema = self.get_schema(hed_versions)
self.summary_dicts = {}

Expand Down Expand Up @@ -183,31 +179,11 @@ def save_summaries(self, save_formats=['.json', '.txt'], individual_summaries="s

@staticmethod
def parse_operations(operation_list):
errors = []
operations = []
for index, item in enumerate(operation_list):
try:
if not isinstance(item, dict):
raise TypeError("InvalidOperationFormat",
f"Each operations must be a dictionary but operation {str(item)} is {type(item)}")
if "operation" not in item:
raise KeyError("MissingOperation",
f"operation {str(item)} does not have a operation key")
if "parameters" not in item:
raise KeyError("MissingParameters",
f"Operation {str(item)} does not have a parameters key")
if item["operation"] not in valid_operations:
raise KeyError("OperationNotListedAsValid",
f"Operation {item['operation']} must be added to operations_list "
f"before it can be executed.")
new_operation = valid_operations[item["operation"]](item["parameters"])
operations.append(new_operation)
except Exception as ex:
errors.append({"index": index, "item": f"{item}", "error_type": type(ex),
"error_code": ex.args[0], "error_msg": ex.args[1]})
if errors:
return [], errors
return operations, []
new_operation = valid_operations[item["operation"]](item["parameters"])
operations.append(new_operation)
return operations

@staticmethod
def prep_data(df):
Expand Down
102 changes: 26 additions & 76 deletions hed/tools/remodeling/operations/base_op.py
Original file line number Diff line number Diff line change
@@ -1,71 +1,29 @@
""" Base class for remodeling operations. """

from abc import ABC, abstractmethod

class BaseOp:
""" Base class for operations. All remodeling operations should extend this class.
The base class holds the parameters and does basic parameter checking against the operation's specification.
"""

def __init__(self, op_spec, parameters):
""" Base class constructor for operations.
class BaseOp(ABC):
""" Base class for operations. All remodeling operations should extend this class."""

def __init__(self, parameters):
""" Constructor for the BaseOp class. Should be extended by operations.
Parameters:
op_spec (dict): Specification for required and optional parameters.
parameters (dict): Actual values of the parameters for the operation.
:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
:raises TypeError:
- If a parameter has the wrong type.
:raises ValueError:
- If the specification is missing a valid operation.
parameters (dict): A dictionary specifying the appropriate parameters for the operation.
"""
self.operation = op_spec.get("operation", "")
if not self.operation:
raise ValueError("OpMustHaveOperation", "Op must have operation is empty")
self.required_params = op_spec.get("required_parameters", {})
self.optional_params = op_spec.get("optional_parameters", {})
self.check_parameters(parameters)

def check_parameters(self, parameters):
""" Verify that the parameters meet the operation specification.
Parameters:
parameters (dict): Dictionary of parameters for this operation.
self.parameters = parameters

:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
@property
@abstractmethod
def NAME(self):
pass

:raises TypeError:
- If a parameter has the wrong type.
"""

required = set(self.required_params.keys())
required_missing = required.difference(set(parameters.keys()))
if required_missing:
raise KeyError("MissingRequiredParameters",
f"{self.operation} requires parameters {list(required_missing)}")
for param_name, param_value in parameters.items():
if param_name in self.required_params:
param_type = self.required_params[param_name]
elif param_name in self.optional_params:
param_type = self.optional_params[param_name]
else:
raise KeyError("BadParameter",
f"{param_name} not a required or optional parameter for {self.operation}")
if isinstance(param_type, list):
self._check_list_type(param_value, param_type)
elif not isinstance(param_value, param_type):
raise TypeError("BadType", f"{param_value} has type {type(param_value)} not {param_type}")
@property
@abstractmethod
def PARAMS(self):
pass

@abstractmethod
def do_op(self, dispatcher, df, name, sidecar=None):
""" Base class method to be overridden by each operation.
Expand All @@ -78,21 +36,13 @@ def do_op(self, dispatcher, df, name, sidecar=None):
"""

return df.copy()

@staticmethod
def _check_list_type(param_value, param_type):
""" Check a parameter value against its specified type.
Parameters:
param_value (any): The value to be checked.
param_type (any): Class to check the param_value against.
:raises TypeError:
- If param_value is not an instance of param_type.
"""

for this_type in param_type:
if isinstance(param_value, this_type):
return
raise TypeError("BadType", f"{param_value} has type {type(param_value)} which is not in {str(param_type)}")
@abstractmethod
def validate_input_data(parameters):
'''Validates whether operation parameter input data meets specific criteria beyond what can be captured in json schema.
For example, whether two input arrays are the same length. Minimum implementation should return an empty list
to indicate no errors were found. If additional validation is necessary, method should perform the validation and
return a list with user friendly error strings.
'''
return []
68 changes: 42 additions & 26 deletions hed/tools/remodeling/operations/convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,53 @@
""" Convert the type of the specified columns of a tabular file. """
#TODO finish implementation

from hed.tools.remodeling.operations.base_op import BaseOp


class ConvertColumnsOp(BaseOp):
""" Convert.
""" Convert data type in column
Required remodeling parameters:
- **column_names** (*list*): The list of columns to convert.
- **convert_to_** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)
- **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)
Optional remodeling parameters:
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).
"""

NAME = "convert_columns"

PARAMS = {
"operation": "convert_columns",
"required_parameters": {
"column_names": list,
"convert_to": str
"type": "object",
"properties": {
"column_names": {
"type": "array",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": True
},
"convert_to": {
"type": "string",
"enum": ['str', 'int', 'float', 'fixed'],
},
"decimal_places": {
"type": "integer"
}
},
"required": [
"column_names",
"convert_to"
],
"additionalProperties": False,
"if": {
"properties": {
"convert_to": {"const": "fixed"}
}
},
"optional_parameters": {
"decimal_places": int
"then": {
"required": ["decimal_places"]
}
}

Expand All @@ -31,25 +57,11 @@ def __init__(self, parameters):
Parameters:
parameters (dict): Parameter values for required and optional parameters.
:raises KeyError:
- If a required parameter is missing.
- If an unexpected parameter is provided.
:raises TypeError:
- If a parameter has the wrong type.
:raises ValueError:
- If convert_to is not one of the allowed values.
"""
super().__init__(self.PARAMS, parameters)
super().__init__(parameters)
self.column_names = parameters['column_names']
self.convert_to = parameters['convert_to']
self.decimal_places = parameters.get('decimal_places', None)
self.allowed_types = ['str', 'int', 'float', 'fixed']
if self.convert_to not in self.allowed_types:
raise ValueError("CannotConvertToSpecifiedType",
f"The convert_to value {self.convert_to} must be one of {str(self.allowed_types)}")

def do_op(self, dispatcher, df, name, sidecar=None):
""" Convert the specified column to a specified type.
Expand All @@ -67,3 +79,7 @@ def do_op(self, dispatcher, df, name, sidecar=None):

df_new = df.copy()
return df_new

@staticmethod
def validate_input_data(operations):
return []
Loading

0 comments on commit 6b6ed14

Please sign in to comment.