Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synching up branches #1021

Merged
merged 4 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def val_error_invalid_char(source_string, char_index):


@hed_tag_error(ValidationErrors.ELEMENT_DEPRECATED, default_severity=ErrorSeverity.WARNING)
def val_error_element_deprecatedr(tag):
def val_error_element_deprecated(tag):
return f"Element '{tag}' has been deprecated and an alternative method of tagging should be used"


Expand Down
16 changes: 8 additions & 8 deletions hed/errors/schema_error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,22 @@ def schema_error_SCHEMA_INVALID_CHILD(tag, child_tag_list):

@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' " + \
"was not defined in the schema, or was used outside of it's defined class."
return (f"Attribute '{attribute_name}' used by '{source_tag}' " +
"was not defined in the schema, or was used outside of it's defined class.")


@hed_error(SchemaWarnings.SCHEMA_PRERELEASE_VERSION_USED, default_severity=ErrorSeverity.WARNING)
def schema_error_SCHEMA_PRERELEASE_VERSION_USED(current_version, known_versions):
return f"Schema version {current_version} used, which is prerelease or unofficial. " + \
f"Known versions are: {', '.join(known_versions)}"
return (f"Schema version {current_version} used, which is prerelease or unofficial. " +
f"Known versions are: {', '.join(known_versions)}")


@hed_error(SchemaWarnings.SCHEMA_PROLOGUE_CHARACTER_INVALID, default_severity=ErrorSeverity.WARNING,
actual_code=SchemaWarnings.SCHEMA_CHARACTER_INVALID)
def schema_error_invalid_character_prologue(char_index, source_string, section_name):
invalid_char = source_string[char_index]
return f"'{section_name}' has invalid character '{invalid_char}' at " + \
f"position {char_index} of string: {source_string}"
return (f"'{section_name}' has invalid character '{invalid_char}' at " +
f"position {char_index} of string: {source_string}")


@hed_error(SchemaWarnings.SCHEMA_INVALID_CHARACTERS_IN_DESC, default_severity=ErrorSeverity.WARNING,
Expand Down Expand Up @@ -91,8 +91,8 @@ def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child
@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED,
actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATION_ERROR)
def schema_error_SCHEMA_ATTRIBUTE_VALUE_DEPRECATED(tag, deprecated_suggestion, attribute_name):
return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " + \
f"and an alternative method of tagging should be used.")
return (f"Tag '{tag}' {attribute_name} uses '{deprecated_suggestion}' which has been deprecated " +
"and an alternative method of tagging should be used.")


@hed_error(SchemaAttributeErrors.SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID,
Expand Down
2 changes: 1 addition & 1 deletion hed/schema/schema_io/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,4 @@ def get_attributes_from_row(row):
if constants.subclass_of in row.index and row[constants.subclass_of] == "HedHeader":
header_attributes, _ = _parse_header_attributes_line(attr_string)
return header_attributes
return parse_attribute_string(attr_string)
return parse_attribute_string(attr_string)
2 changes: 1 addition & 1 deletion hed/schema/schema_io/ontology_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _verify_hedid_matches(section, df, unused_tag_ids):
if id_int not in unused_tag_ids:
hedid_errors += schema_util.format_error(
row_number, row, f"'{label}' has id {id_int} which is outside " +
f"of the valid range for this type. Valid range is: " +
"of the valid range for this type. Valid range is: " +
f"{min(unused_tag_ids)} to {max(unused_tag_ids)}")
continue
except ValueError:
Expand Down
2 changes: 1 addition & 1 deletion hed/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
from .util.hed_logger import HedLogger
from .util.data_util import get_new_dataframe, get_value_dict, replace_values, reorder_columns
from .util.io_util import check_filename, clean_filename, extract_suffix_path, get_file_list, make_path
from .util.io_util import get_dir_dictionary, get_file_list, get_path_components, parse_bids_filename
from .util.io_util import get_dir_dictionary, get_path_components, parse_bids_filename

from .analysis.annotation_util import \
check_df_columns, extract_tags, generate_sidecar_entry, get_bids_dataset, hed_to_df, df_to_hed, merge_hed_dict, \
Expand Down
4 changes: 2 additions & 2 deletions hed/tools/analysis/annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def generate_sidecar_entry(column_name, column_values=None):
name_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_name)
sidecar_entry = {"Description": f"Description for {column_name}", "HED": ""}
if not column_values:
sidecar_entry["HED"] = f"(Label/{name_label}, Label/#)"
sidecar_entry["HED"] = f"(Label/{name_label}, ID/#)"
else:
levels = {}
hed = {}
Expand All @@ -112,7 +112,7 @@ def generate_sidecar_entry(column_name, column_values=None):
continue
value_label = re.sub(r'[^A-Za-z0-9-]+', '_', column_value)
levels[column_value] = f"Here describe column value {column_value} of column {column_name}"
hed[column_value] = f"(Label/{name_label}, Label/{value_label})"
hed[column_value] = f"(Label/{name_label}, ID/{value_label})"
sidecar_entry["Levels"] = levels
sidecar_entry["HED"] = hed
return sidecar_entry
Expand Down
10 changes: 5 additions & 5 deletions hed/tools/remodeling/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Remodeling tools for revising and summarizing tabular files."""

from .backup_manager import BackupManager
from .dispatcher import Dispatcher
from .remodeler_validator import RemodelerValidator
""" Remodeling tools for revising and summarizing tabular files."""
from .backup_manager import BackupManager
from .dispatcher import Dispatcher
from .remodeler_validator import RemodelerValidator
2 changes: 1 addition & 1 deletion hed/tools/remodeling/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
""" Command-line interface for remodeling tools. """
""" Command-line interface for remodeling tools. """
2 changes: 2 additions & 0 deletions hed/tools/util/data_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def make_info_dataframe(col_info, selected_col):
df = pd.DataFrame(sorted(list(col_values)), columns=[selected_col])
return df


def replace_na(df):
""" Replace (in place) the n/a with np.nan taking care of categorical columns. """
for column in df.columns:
Expand All @@ -221,6 +222,7 @@ def replace_na(df):
df[column] = df[column].replace('n/a', np.nan)
df[column] = pd.Categorical(df[column])


def replace_values(df, values=None, replace_value='n/a', column_list=None):
""" Replace string values in specified columns.

Expand Down
7 changes: 3 additions & 4 deletions hed/tools/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, word_cloud_to_svg

""" Visualization tools for HED. """

from .tag_word_cloud import create_wordcloud, word_cloud_to_svg
2 changes: 1 addition & 1 deletion hed/validator/util/class_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _check_value_class(self, original_tag, stripped_value, report_as, error_code
char_errors = {}
for class_name in classes:
char_errors[class_name] = self._get_problem_indices(stripped_value, class_name, start_index=start_index)
if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class
if class_valid[class_name] and not char_errors[class_name]: # We have found a valid class
return []
index_adj = len(report_as.org_base_tag) - len(original_tag.org_base_tag)
validation_issues = self.report_value_errors(char_errors, class_valid, report_as, index_adj)
Expand Down
6 changes: 3 additions & 3 deletions spec_tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def _run_single_events_test(self, info, schema, def_dict, error_code, all_codes,
for row in test:
if not isinstance(row, list):
print(f"Improper grouping in test: {error_code}:{name}")
print(f"This is probably a missing set of square brackets.")
print("This is probably a missing set of square brackets.")
break
string += "\t".join(str(x) for x in row) + "\n"

Expand All @@ -169,7 +169,7 @@ def _run_single_combo_test(self, info, schema, def_dict, error_code, all_codes,
if not isinstance(row, list):
print(f"Improper grouping in test: {error_code}:{name}")
print(f"Improper data for test {name}: {test}")
print(f"This is probably a missing set of square brackets.")
print("This is probably a missing set of square brackets.")
break
string += "\t".join(str(x) for x in row) + "\n"

Expand Down Expand Up @@ -209,7 +209,7 @@ def test_errors(self):
for test_file in self.test_files:
self.run_single_test(test_file)
# test_file = './temp.json'
self.run_single_test(test_file)
# self.run_single_test(test_file)
print(f"{len(self.fail_count)} tests got an unexpected result")
print("\n".join(self.fail_count))
self.assertEqual(len(self.fail_count), 0)
Expand Down
2 changes: 1 addition & 1 deletion tests/errors/test_error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_replace_tag_references(self):
self.assertEqual(nested_list, ['Hed1', {'a': 2, 'b': [3, {'c': 'Hed2'}]}])

# Test with mixed data types and HedString in a list within a dict
mixed = {'a': HedString('Hed1', self._schema),
mixed = {'a': HedString('Hed1', self._schema),
'b': [2, 3, {'c': HedString('Hed2', self._schema)}, 4]}
replace_tag_references(mixed)
self.assertEqual(mixed, {'a': 'Hed1', 'b': [2, 3, {'c': 'Hed2'}, 4]})
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_hed_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class TestHedStrings(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.schema = load_schema_version("8.3.0")

def validator_scalar(self, test_strings, expected_results, test_function):
for test_key in test_strings:
test_result = test_function(test_strings[test_key])
Expand Down
25 changes: 2 additions & 23 deletions tests/tools/analysis/test_annotation_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,12 @@ def test_generate_sidecar_entry_non_letters(self):
self.assertIn('HED', entry1,
"generate_sidecar_entry has a HED key when column values and special chars")
hed_entry1 = entry1['HED']
self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, Label/apple_1)',
self.assertEqual(hed_entry1['apple 1'], '(Label/my_-123_10, ID/apple_1)',
"generate_sidecar_entry HED entry should convert labels correctly when column values")
entry2 = annotation_util.generate_sidecar_entry('my !#$-123_10')
self.assertIsInstance(entry2, dict,
"generate_sidecar_entry is a dictionary when no column values and special chars.")
self.assertEqual(entry2['HED'], '(Label/my_-123_10, Label/#)',
self.assertEqual(entry2['HED'], '(Label/my_-123_10, ID/#)',
"generate_sidecar_entry HED entry has correct label when no column values and special chars.")

def test_hed_to_df(self):
Expand Down Expand Up @@ -295,27 +295,6 @@ def test_merge_hed_dict_full(self):
annotation_util.merge_hed_dict(example_sidecar, spreadsheet_sidecar)
self.assertEqual(6, len(example_sidecar), 'merge_hed_dict merges with the correct length')

def test_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.to_factor(series1)
self.assertEqual(len(series1), len(factor1))
self.assertEqual(sum(factor1), len(factor1))
series2 = Series(['a', '', None, np.nan, 'n/a'])
factor2 = annotation_util.to_factor(series2)
self.assertEqual(len(series2), len(factor2))
self.assertEqual(sum(factor2), 1)
data = {
'Name': ['Alice', '', 'n/a', 1.0], # Contains a space
'Age': [25, np.nan, 35, 0]
}
df = DataFrame(data)
factor3 = annotation_util.to_factor(df, column='Name')
self.assertEqual(sum(factor3), 2)
factor4 = annotation_util.to_factor(df)
self.assertEqual(sum(factor4), 2)
with self.assertRaises(HedFileError):
annotation_util.to_factor(data)

def test_series_to_factor(self):
series1 = Series([1.0, 2.0, 3.0, 4.0])
factor1 = annotation_util.series_to_factor(series1)
Expand Down
2 changes: 1 addition & 1 deletion tests/tools/remodeling/operations/test_base_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self, parameters):

def do_op(self, dispatcher, df, name, sidecar=None):
return df.copy()

@staticmethod
def validate_input_data(parameters):
return []
Expand Down
5 changes: 3 additions & 2 deletions tests/tools/remodeling/operations/test_number_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@ def tearDownClass(cls):

# test working
def test_number_groups_new_column(self):
pass
# Test when new column name is given with overwrite unspecified (=False)
parms = json.loads(self.json_parms)
op = NumberGroupsOp(parms)
# parms = json.loads(self.json_parms)
# op = NumberGroupsOp(parms)
# df = pd.DataFrame(self.sample_data, columns=self.sample_columns)
# df_check = pd.DataFrame(self.numbered_data, columns=self.numbered_columns)
# df_test = pd.DataFrame(self.sample_data, columns=self.sample_columns)
Expand Down
2 changes: 1 addition & 1 deletion tests/validator/test_def_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def test_expand_def_tags_placeholder(self):
expand_defs=False, shrink_defs=False,
remove_definitions=False, basic_definition_string=self.placeholder_definition_string)

self.base_def_validator(basic_def_strings, basic_def_strings,
self.base_def_validator(basic_def_strings, basic_def_strings,
expand_defs=False, shrink_defs=True,
remove_definitions=False, basic_definition_string=self.placeholder_definition_string)

Expand Down
6 changes: 3 additions & 3 deletions tests/validator/test_tag_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,8 +503,8 @@ def test_topLevelTagGroup_validation(self):
'valid1': [],
'valid2': [],
'invalid2': self.format_error(
ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=
ValidationErrors.DEFINITION_INVALID) + self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
ValidationErrors.HED_TOP_LEVEL_TAG, tag=1, actual_error=ValidationErrors.DEFINITION_INVALID) + \
self.format_error(ValidationErrors.HED_TOP_LEVEL_TAG, tag=1),
'invalidTwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0,
multiple_tags="Definition/InvalidDef3".split(", ")),
'invalid2TwoInOne': self.format_error(ValidationErrors.HED_MULTIPLE_TOP_TAGS, tag=0,
Expand Down Expand Up @@ -1045,7 +1045,7 @@ def test_special_units(self):
expected_issues = {
'ascii': [],
'illegalTab': self.format_error(ValidationErrors.INVALID_VALUE_CLASS_CHARACTER, tag=0,
index_in_tag=13, index_in_tag_end=14, value_class="textClass"),
index_in_tag=13, index_in_tag_end=14, value_class="textClass"),
'allowTab': []
}
self.validator_semantic(test_strings, expected_results, expected_issues, True)
Expand Down