From c5f03860d49753f0fb394cf141e64bce5f789726 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 21 Sep 2023 12:39:20 -0500
Subject: [PATCH 01/21] Add a string based search first pass Update query
 parser from [[]] to {} notation and clean up some Minor bug fixes in query
 parser

---
 hed/models/base_input.py               |   2 +-
 hed/models/basic_search.py             | 237 +++++++++++++++++++
 hed/models/df_util.py                  |  26 ++
 hed/models/expression_parser.py        | 159 +++++++------
 tests/models/test_base_input.py        |  27 ++-
 tests/models/test_basic_search.py      | 313 +++++++++++++++++++++++++
 tests/models/test_expression_parser.py | 118 +++++++---
 tests/schema/test_hed_schema_io.py     |   4 +
 8 files changed, 756 insertions(+), 130 deletions(-)
 create mode 100644 hed/models/basic_search.py
 create mode 100644 tests/models/test_basic_search.py

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 12e2d889..9f437102 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -137,7 +137,7 @@ def _indexed_dict_from_onsets(onsets):
 
     @staticmethod
     def _filter_by_index_list(original_series, indexed_dict):
-        new_series = ["n/a"] * len(original_series)  # Initialize new_series with "n/a"
+        new_series = pd.Series(["n/a"] * len(original_series))
 
         for onset, indices in indexed_dict.items():
             if indices:
diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py
new file mode 100644
index 00000000..ae47b71e
--- /dev/null
+++ b/hed/models/basic_search.py
@@ -0,0 +1,237 @@
+import re
+from itertools import combinations, product
+from collections import defaultdict
+import pandas as pd
+
+
+def find_matching(series, search_string, regex=False):
+    """ Finds lines in the series that match the search string and returns a mask.
+
+    Syntax Rules:
+        - '@': Prefixing a term in the search string means the object must appear anywhere within a line.
+        - Parentheses: Elements within parentheses must appear in the line with the same level of nesting.
+                eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't
+                    start in the same group.
+        - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis)
+        - An individual term can be arbitrary regex, but it is limited to single continuous words.
+
+    Notes:
+        - The format of the series should match the format of the search string, whether it's in short or long form.
+        - To enable support for matching parent tags, ensure that both the series and search string are in long form.
+
+    Args:
+        series (pd.Series): A Pandas Series object containing the lines to be searched.
+        search_string (str): The string to search for in each line of the series.
+        regex (bool): By default, translate any * wildcard characters to .*? regex
+                      If True, do no translation and pass the words as is. Due to how it's setup, you must not include
+                      the following characters: (),
+
+    Returns:
+        mask (pd.Series): A Boolean mask Series of the same length as the input series.
+                          The mask has `True` for lines that match the search string and `False` otherwise.
+    """
+    if not regex:
+        # Replace *'s with a reasonable value for people who don't know regex
+        search_string = re.sub(r'(?<!\.)\*', '.*?', search_string)
+    anywhere_words, specific_words = find_words(search_string)
+    delimiter_map = construct_delimiter_map(search_string, specific_words)
+    source_words = anywhere_words + specific_words
+
+    # Create a set of series of masks to determine which rows contain each individual word
+    candidate_indexes = set(series.index)
+
+    # Loop through source_words to filter down candidate_indexes
+    for word in source_words:
+        matches = series.str.contains(word, regex=True)
+        current_word_indexes = set(matches[matches].index.tolist())
+
+        # Update candidate_indexes by taking the intersection with current_word_indexes
+        candidate_indexes &= current_word_indexes
+
+        if not candidate_indexes:
+            break
+
+    candidate_indexes = sorted(candidate_indexes)
+
+    full_mask = pd.Series(False, index=series.index)
+
+    candidate_series = series[candidate_indexes]
+
+    mask = candidate_series.apply(verify_search_delimiters, args=(anywhere_words, specific_words, delimiter_map))
+    full_mask.loc[candidate_indexes] = mask
+
+    return full_mask
+
+
+def find_words(search_string):
+    """ Extract all words in the search string.  Dividing them into words that must be relative to each other,
+         and words that can be anywhere.
+
+    Args:
+        search_string (str): The search query string to parse.
+                             Words prefixed with '@' are 'anywhere' words.
+
+    Returns:
+        tuple: A tuple containing two lists:
+            - anywhere_words (list of str): Words that can appear anywhere in the text.
+            - specific_words (list of str): Words that must appear relative to other terms.
+    """
+    # Match sequences of characters that are not commas, parentheses, or standalone spaces.
+    pattern = r'[^,()]+'
+    words = re.findall(pattern, search_string)
+
+    # Remove any extraneous whitespace from each word
+    words = [word.strip() for word in words if word.strip()]
+
+    anywhere_words = [word[1:] for word in words if word.startswith("@")]
+    specific_words = [word for word in words if not word.startswith("@")]
+
+    return anywhere_words, specific_words
+
+
+def check_parentheses(text):
+    """ Checks for balanced parentheses in the given text and returns the unbalanced ones.
+
+    Args:
+        text (str): The text to be checked for balanced parentheses.
+
+    Returns:
+        str: A string containing the unbalanced parentheses in their original order.
+
+    Notes:
+        - The function only considers the characters '(' and ')' for balancing.
+        - Balanced pairs of parentheses are removed, leaving behind only the unbalanced ones.
+
+    """
+    # Extract all parentheses from the text
+    all_parentheses = ''.join(re.findall('[()]', text))
+
+    stack = []
+    remaining_parentheses = []
+
+    # Loop through all parentheses and find balanced ones
+    for p in all_parentheses:
+        if p == '(':
+            stack.append(p)
+        elif p == ')' and stack:
+            stack.pop()
+        else:
+            remaining_parentheses.append(p)
+
+    # Add unbalanced ( back to remaining parentheses
+    remaining_parentheses.extend(stack)
+
+    return ''.join(remaining_parentheses)
+
+
+def reverse_and_flip_parentheses(s):
+    """ Reverses a string and flips the parentheses.
+
+        Args:
+            s (str): The string to be reversed and have its parentheses flipped.
+
+        Returns:
+            str: The reversed string with flipped parentheses.
+
+        Notes:
+            - The function takes into account only the '(' and ')' characters for flipping.
+    """
+    # Reverse the string
+    reversed_s = s[::-1]
+
+    # Flip the parentheses directly in the reversed string
+    flipped_s = reversed_s.translate(str.maketrans("()", ")("))
+    return flipped_s
+
+
+def construct_delimiter_map(text, words):
+    """ Takes an input search query and list of words, returning the parenthetical delimiters between them.
+
+    Args: delimiter
+        text (str): The search query
+        words(list): A list of words we want to map between from the query
+
+    Returns:
+        dict: The two-way delimiter map
+    """
+    locations = {}
+    # Find the locations of each word in the text
+    for word in words:
+        for match in re.finditer(r'(?:[ ,()]|^)(' + word + r')(?:[ ,()]|$)', text):
+            start_index = match.start(1)
+            end_index = match.end(1)
+            match_length = end_index - start_index
+            locations[start_index] = (word, match_length)
+
+    sorted_locations = sorted(locations.items())
+
+    delimiter_map = {}
+    # Use combinations to get every combination of two words in order
+    for (start1, (word1, length1)), (start2, (word2, length2)) in combinations(sorted_locations, 2):
+        end1 = start1 + length1
+        delimiter_text = text[end1:start2]
+        delimiter_map[(word1, word2)] = check_parentheses(delimiter_text)
+
+    # Add the reversed version of the above
+    reverse_map = {(word2, word1): reverse_and_flip_parentheses(delimiter_text) for ((word1, word2), delimiter_text) in
+                   delimiter_map.items()}
+    delimiter_map.update(reverse_map)
+
+    return delimiter_map
+
+
+def verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map):
+    """ Verifies if the text contains specific words with expected delimiters between them.
+
+    Args:
+        text (str): The text to search in.
+        anywhere_words (list of str): Words that can appear anywhere in the text.
+        specific_words (list of str): Words that must appear relative to other words in the text
+        delimiter_map (dict): A dictionary specifying expected delimiters between pairs of specific words.
+
+    Returns:
+        bool: True if all conditions are met, otherwise False.
+    """
+    locations = defaultdict(list)
+
+    # Check for anywhere words
+    for word in anywhere_words:
+        pattern = r'(?:[ ,()]|^)(' + word + r')(?:[ ,()]|$)'
+        if not any(re.finditer(pattern, text)):
+            return False
+
+    # Find all locations for each word in the text
+    for word in specific_words:
+        for match in re.finditer(r'(?:[ ,()]|^)(' + word + r')(?:[ ,()]|$)', text):
+            start_index = match.start(1)
+            matched_word = match.group(1)
+            locations[word].append((start_index, len(matched_word), word))
+
+    if len(locations) != len(specific_words):
+        return False
+
+    # Generate all possible combinations of word sequences
+    # this covers cases where the same tag is found twice, and you need to check both
+    for sequence in product(*locations.values()):
+        sorted_sequence = sorted(sequence)
+
+        # Check if the delimiters for this sequence match the expected delimiters
+        valid = True
+        for i in range(len(sorted_sequence) - 1):
+            start1, len1, word1 = sorted_sequence[i]
+            start2, len2, word2 = sorted_sequence[i + 1]
+
+            end1 = start1 + len1
+            delimiter_text = text[end1:start2]
+
+            found_delimiter = check_parentheses(delimiter_text)
+            expected_delimiter = delimiter_map.get((word1, word2), None)
+
+            if found_delimiter != expected_delimiter:
+                valid = False
+                break
+
+        if valid:
+            return True  # Return True if any sequence is valid
+
+    return False  # Return False if no valid sequence is found
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
index 0a9373d1..2449e8a7 100644
--- a/hed/models/df_util.py
+++ b/hed/models/df_util.py
@@ -120,6 +120,26 @@ def expand_defs(df, hed_schema, def_dict, columns=None):
             df.loc[mask, column] = df.loc[mask, column].apply(partial(_expand_defs, hed_schema=hed_schema, def_dict=def_dict))
 
 
+def sort_strings(df, hed_schema, tag_form="short_tag", columns=None):
+    """ Expands any def tags found in the dataframe.
+
+        Converts in place
+
+    Parameters:
+        df (pd.Dataframe or pd.Series): The dataframe or series to modify
+        hed_schema (HedSchema or None): The schema to use to identify defs
+        columns (list or None): The columns to modify on the dataframe
+    """
+    if isinstance(df, pd.Series):
+        df[:] = df.apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))
+    else:
+        if columns is None:
+            columns = df.columns
+
+        for column in columns:
+            df.loc[column] = df.loc[column].apply(partial(_sort, hed_schema=hed_schema, tag_form=tag_form))
+
+
 def _convert_to_form(hed_string, hed_schema, tag_form):
     return str(HedString(hed_string, hed_schema).get_as_form(tag_form))
 
@@ -132,6 +152,12 @@ def _expand_defs(hed_string, hed_schema, def_dict):
     return str(HedString(hed_string, hed_schema, def_dict).expand_defs())
 
 
+def _sort(hed_string, hed_schema, tag_form):
+    sorted_string = HedString(hed_string, hed_schema)
+    sorted_string.sort()
+    return sorted_string.get_as_form(tag_form)
+
+
 def process_def_expands(hed_strings, hed_schema, known_defs=None, ambiguous_defs=None):
     """ Gather def-expand tags in the strings/compare with known definitions to find any differences
 
diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py
index 736ed625..ab56aa76 100644
--- a/hed/models/expression_parser.py
+++ b/hed/models/expression_parser.py
@@ -1,7 +1,7 @@
 import re
 
 
-class search_result:
+class SearchResult:
     def __init__(self, group, tag):
         self.group = group
         # todo: rename tag: children
@@ -12,7 +12,7 @@ def __init__(self, group, tag):
         self.tags = new_tags
 
     def __eq__(self, other):
-        if isinstance(other, search_result):
+        if isinstance(other, SearchResult):
             return self.group == other.group
         return other == self.group
 
@@ -27,7 +27,7 @@ def merge_result(self, other):
 
         if self.group != other.group:
             raise ValueError("Internal error")
-        return search_result(self.group, new_tags)
+        return SearchResult(self.group, new_tags)
 
     def has_same_tags(self, other):
         if self.group != other.group:
@@ -53,8 +53,6 @@ def get_groups_only(self):
 class Token:
     And = 0
     Tag = 1
-    ContainingGroup = 2
-    ContainingGroupEnd = 3
     DescendantGroup = 4
     DescendantGroupEnd = 5
     Or = 6
@@ -72,8 +70,6 @@ def __init__(self, text):
             ",": Token.And,
             "and": Token.And,
             "or": Token.Or,
-            "[[": Token.ContainingGroup,
-            "]]": Token.ContainingGroupEnd,
             "[": Token.DescendantGroup,
             "]": Token.DescendantGroupEnd,
             "(": Token.LogicalGroup,
@@ -116,6 +112,17 @@ def __init__(self, token, left=None, right=None):
             self._match_mode = 2
             token.text = token.text.replace("*", "")
 
+    def _get_parent_groups(self, search_results):
+        found_parent_groups = []
+        if search_results:
+            for group in search_results:
+                if not group.group.is_group:
+                    continue
+                if group.group._parent:
+                    found_parent_groups.append(SearchResult(group.group._parent, group.group))
+
+        return found_parent_groups
+
     def __str__(self):
         output_str = ""
         if self.left:
@@ -138,16 +145,16 @@ def handle_expr(self, hed_group, exact=False):
             if groups_found:
                 groups_found = []
             else:
-                groups_found = [(None, group) for group in hed_group.get_all_groups()]
+                groups_found = [([], group) for group in hed_group.get_all_groups()]
 
         # If we're checking for all groups, also need to add parents.
         if exact:
-            all_found_groups = [search_result(group, tag) for tag, group in groups_found]
+            all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
         else:
             all_found_groups = []
             for tag, group in groups_found:
                 while group:
-                    all_found_groups.append(search_result(group, tag))
+                    all_found_groups.append(SearchResult(group, tag))
                     # This behavior makes it eat higher level groups at higher levels
                     tag = group
                     group = group._parent
@@ -170,7 +177,7 @@ def merge_groups(groups1, groups2):
             for other_group in groups2:
                 if group.group is other_group.group:
                     # At this point any shared tags between the two groups invalidates it.
-                    if any(tag is tag2 for tag in group.tags for tag2 in other_group.tags):
+                    if any(tag is tag2 and tag is not None for tag in group.tags for tag2 in other_group.tags):
                         continue
                     merged_result = group.merge_result(other_group)
 
@@ -221,7 +228,7 @@ def handle_expr(self, hed_group, exact=False):
         # Wildcards are only found in containing groups.  I believe this is correct.
         # todo: Is this code still needed for this kind of wildcard?  We already are registering every group, just not
         # every group at every level.
-        all_found_groups = [search_result(group, tag) for tag, group in groups_found]
+        all_found_groups = [SearchResult(group, tag) for tag, group in groups_found]
         return all_found_groups
 
 
@@ -260,73 +267,51 @@ def handle_expr(self, hed_group, exact=False):
         # Todo: this may need more thought with respects to wildcards and negation
         # negated_groups = [group for group in hed_group.get_all_groups() if group not in groups]
         # This simpler version works on python >= 3.9
-        # negated_groups = [search_result(group, []) for group in hed_group.get_all_groups() if group not in groups]
+        # negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups() if group not in groups]
         # Python 3.7/8 compatible version.
-        negated_groups = [search_result(group, []) for group in hed_group.get_all_groups()
+        negated_groups = [SearchResult(group, []) for group in hed_group.get_all_groups()
                           if not any(group is found_group.group for found_group in found_groups)]
 
         return negated_groups
 
 
-class ExpressionContainingGroup(Expression):
-    def handle_expr(self, hed_group, exact=False):
-        result = self.right.handle_expr(hed_group, exact=True)
-        found_groups = result
-        if result:
-            found_parent_groups = []
-            for group in found_groups:
-                if not group.group.is_group:
-                    continue
-                if group.group._parent:
-                    found_parent_groups.append(search_result(group.group._parent, group.group))
-
-            if found_parent_groups:
-                return found_parent_groups
-
-        return []
-
-
 class ExpressionDescendantGroup(Expression):
     def handle_expr(self, hed_group, exact=False):
         found_groups = self.right.handle_expr(hed_group)
-        found_parent_groups = []
-        if found_groups:
-            for group in found_groups:
-                if not group.group.is_group:
-                    continue
-                if group.group._parent:
-                    found_parent_groups.append(search_result(group.group._parent, group.group))
-
-        if found_parent_groups:
-            return found_parent_groups
-        return []
+        found_parent_groups = self._get_parent_groups(found_groups)
+        return found_parent_groups
 
 
 class ExpressionExactMatch(Expression):
+    def __init__(self, token, left=None, right=None):
+        super().__init__(token, left, right)
+        self.optional = "any"
+
+    def _filter_exact_matches(self, search_results):
+        filtered_list = []
+        for group in search_results:
+            if len(group.group.children) == len(group.tags):
+                filtered_list.append(group)
+
+        return filtered_list
+
     def handle_expr(self, hed_group, exact=False):
         found_groups = self.right.handle_expr(hed_group, exact=True)
-        if found_groups:
-            return_list = []
-            for group in found_groups:
-                if len(group.group.children) == len(group.tags):
-                    return_list.append(group)
+        if self.optional == "any":
+            return self._get_parent_groups(found_groups)
 
-            if return_list:
-                return return_list
+        filtered_list = self._filter_exact_matches(found_groups)
+        if filtered_list:
+            return self._get_parent_groups(filtered_list)
 
         # Basically if we don't have an exact match above, do the more complex matching including optional
         if self.left:
             optional_groups = self.left.handle_expr(hed_group, exact=True)
             found_groups = ExpressionAnd.merge_groups(found_groups, optional_groups)
 
-        if found_groups:
-            return_list = []
-            for group in found_groups:
-                if len(group.group.children) == len(group.tags):
-                    return_list.append(group)
-
-            if return_list:
-                return return_list
+        filtered_list = self._filter_exact_matches(found_groups)
+        if filtered_list:
+            return self._get_parent_groups(filtered_list)
 
         return []
 
@@ -337,7 +322,6 @@ class QueryParser:
     def __init__(self, expression_string):
         """Compiles a QueryParser for a particular expression, so it can be used to search hed strings.
 
-
         Basic Input Examples:
 
         'Event' - Finds any strings with Event, or a descendent tag of Event such as Sensory-event
@@ -354,11 +338,15 @@ def __init__(self, expression_string):
 
         '[Event and Action]' - Find a group that contains both Event and Action(at any level)
 
-        '[[Event and Action]]' - Find a group with Event And Action at the same level.
+        '{Event and Action}' - Find a group with Event And Action at the same level.
+
+        '{Event and Action:}' - Find a group with Event And Action at the same level, and nothing else
+
+        '{Event and Action:Agent}' - Find a group with Event And Action at the same level, and optionally an Agent tag.
 
         Practical Complex Example:
 
-        [[{(Onset or Offset), (Def or [[Def-expand]]): ???}]] - A group with an onset tag,
+        {(Onset or Offset), (Def or {Def-expand}): ???} - A group with an onset tag,
                                     a def tag or def-expand group, and an optional wildcard group
 
         Parameters:
@@ -392,15 +380,22 @@ def current_token(self):
 
     def _handle_and_op(self):
         expr = self._handle_negation()
-        next_token = self._next_token_is([Token.And, Token.Or])
+        next_token = self._next_token_is([Token.And])
         while next_token:
             right = self._handle_negation()
             if next_token.kind == Token.And:
                 expr = ExpressionAnd(next_token, expr, right)
-            elif next_token.kind == Token.Or:
-                expr = ExpressionOr(next_token, expr, right)
-            next_token = self._next_token_is([Token.And, Token.Or])
+            next_token = self._next_token_is([Token.And])
+        return expr
 
+    def _handle_or_op(self):
+        expr = self._handle_and_op()  # Note: calling _handle_and_op here
+        next_token = self._next_token_is([Token.Or])
+        while next_token:
+            right = self._handle_and_op()  # Note: calling _handle_and_op here
+            if next_token.kind == Token.Or:
+                expr = ExpressionOr(next_token, expr, right)
+            next_token = self._next_token_is([Token.Or])
         return expr
 
     def _handle_negation(self):
@@ -417,33 +412,35 @@ def _handle_negation(self):
 
     def _handle_grouping_op(self):
         next_token = self._next_token_is(
-            [Token.ContainingGroup, Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
-        if next_token == Token.ContainingGroup:
-            interior = self._handle_and_op()
-            expr = ExpressionContainingGroup(next_token, right=interior)
-            next_token = self._next_token_is([Token.ContainingGroupEnd])
-            if next_token != Token.ContainingGroupEnd:
-                raise ValueError("Parse error: Missing closing square brackets")
-        # Can we move this to the and_or level?  or does that break everything...?
-        elif next_token == Token.LogicalGroup:
-            expr = self._handle_and_op()
+            [Token.LogicalGroup, Token.DescendantGroup, Token.ExactMatch])
+        if next_token == Token.LogicalGroup:
+            expr = self._handle_or_op()
             next_token = self._next_token_is([Token.LogicalGroupEnd])
             if next_token != Token.LogicalGroupEnd:
                 raise ValueError("Parse error: Missing closing paren")
         elif next_token == Token.DescendantGroup:
-            interior = self._handle_and_op()
+            interior = self._handle_or_op()
             expr = ExpressionDescendantGroup(next_token, right=interior)
             next_token = self._next_token_is([Token.DescendantGroupEnd])
             if next_token != Token.DescendantGroupEnd:
                 raise ValueError("Parse error: Missing closing square bracket")
         elif next_token == Token.ExactMatch:
-            interior = self._handle_and_op()
+            interior = self._handle_or_op()
             expr = ExpressionExactMatch(next_token, right=interior)
             next_token = self._next_token_is([Token.ExactMatchEnd, Token.ExactMatchOptional])
             if next_token == Token.ExactMatchOptional:
-                optional_portion = self._handle_and_op()
-                expr.left = optional_portion
+                # We have an optional portion - this needs to now be an exact match
+                expr.optional = "none"
                 next_token = self._next_token_is([Token.ExactMatchEnd])
+                if next_token != Token.ExactMatchEnd:
+                    optional_portion = self._handle_or_op()
+                    expr.left = optional_portion
+                    next_token = self._next_token_is([Token.ExactMatchEnd])
+                if "~" in str(expr):
+                    raise ValueError("Cannot use negation in exact matching groups,"
+                                     " as it's not clear what is being matched.\n"
+                                     "{thing and ~(expression)} is allowed.")
+
             if next_token is None:
                 raise ValueError("Parse error: Missing closing curly bracket")
         else:
@@ -452,13 +449,15 @@ def _handle_grouping_op(self):
                 expr = ExpressionWildcardNew(next_token)
             elif next_token:
                 expr = Expression(next_token)
+            else:
+                expr = None
 
         return expr
 
     def _parse(self, expression_string):
         self.tokens = self._tokenize(expression_string)
 
-        expr = self._handle_and_op()
+        expr = self._handle_or_op()
 
         if self.at_token + 1 != len(self.tokens):
             raise ValueError("Parse error in search string")
diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index f5b381eb..71e21386 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -304,25 +304,30 @@ def test_complex_onsets(self):
                          {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]})
 
     def test_empty_and_single_item_series(self):
-        self.assertEqual(BaseInput._filter_by_index_list([], {}), [])
-        self.assertEqual(BaseInput._filter_by_index_list(["apple"], {0: [0]}), ["apple"])
+        self.assertTrue(BaseInput._filter_by_index_list(pd.Series([]), {}).equals(pd.Series([])))
+        self.assertTrue(BaseInput._filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"])))
 
     def test_two_item_series_with_same_onset(self):
-        self.assertEqual(BaseInput._filter_by_index_list(["apple", "orange"], {0: [0, 1]}), ["apple,orange", "n/a"])
+        input_series = pd.Series(["apple", "orange"])
+        expected_series = pd.Series(["apple,orange", "n/a"])
+        self.assertTrue(BaseInput._filter_by_index_list(input_series, {0: [0, 1]}).equals(expected_series))
 
     def test_multiple_item_series(self):
-        original = ["apple", "orange", "banana", "mango"]
+        input_series = pd.Series(["apple", "orange", "banana", "mango"])
         indexed_dict = {0: [0, 1], 1: [2], 2: [3]}
-        self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["apple,orange", "n/a", "banana", "mango"])
+        expected_series = pd.Series(["apple,orange", "n/a", "banana", "mango"])
+        self.assertTrue(BaseInput._filter_by_index_list(input_series, indexed_dict).equals(expected_series))
 
     def test_complex_scenarios(self):
         # Test with negative, zero and positive onsets
-        original = ["negative", "zero", "positive"]
+        original = pd.Series(["negative", "zero", "positive"])
         indexed_dict = {-1: [0], 0: [1], 1: [2]}
-        self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict), ["negative", "zero", "positive"])
+        expected_series1 = pd.Series(["negative", "zero", "positive"])
+        self.assertTrue(BaseInput._filter_by_index_list(original, indexed_dict).equals(expected_series1))
 
         # Test with more complex indexed_dict
-        original = ["apple", "orange", "banana", "mango", "grape"]
-        indexed_dict = {0: [0, 1], 1: [2], 2: [3, 4]}
-        self.assertEqual(BaseInput._filter_by_index_list(original, indexed_dict),
-                         ["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+        original2 = ["apple", "orange", "banana", "mango", "grape"]
+        indexed_dict2= {0: [0, 1], 1: [2], 2: [3, 4]}
+        expected_series2 = pd.Series(["apple,orange", "n/a", "banana", "mango,grape", "n/a"])
+        self.assertTrue(BaseInput._filter_by_index_list(original2, indexed_dict2).equals(expected_series2))
+
diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py
new file mode 100644
index 00000000..0a942b93
--- /dev/null
+++ b/tests/models/test_basic_search.py
@@ -0,0 +1,313 @@
+import unittest
+import pandas as pd
+from hed import load_schema_version
+
+import os
+from hed import TabularInput
+from hed.models import df_util, basic_search
+from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \
+    construct_delimiter_map, verify_search_delimiters, find_matching
+import numpy as np
+
+
+class TestNewSearch(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        bids_root_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                                       '../data/bids_tests/eeg_ds003645s_hed'))
+        sidecar1_path = os.path.realpath(os.path.join(bids_root_path, 'task-FacePerception_events.json'))
+        cls.events_path = os.path.realpath(
+            os.path.join(bids_root_path, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv'))
+        cls.base_input = TabularInput(cls.events_path, sidecar1_path)
+        cls.schema = load_schema_version()
+        cls.df = cls.base_input.series_filtered
+
+    def test_find_matching_results(self):
+        result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
+        result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
+
+        # Add assertions
+        self.assertTrue(np.sum(result1) > 0, "result1 should have some true values")
+        self.assertTrue(np.sum(result2) > 0, "result2 should have some true values")
+        self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2")
+
+
+class TestFindWords(unittest.TestCase):
+    def test_basic(self):
+        search_string = "@global (local1, local2)"
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, ['global'])
+        self.assertEqual(specific_words, ['local1', 'local2'])
+
+    def test_no_anywhere_words(self):
+        search_string = "(local1, local2)"
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, [])
+        self.assertEqual(specific_words, ['local1', 'local2'])
+
+    def test_no_specific_words(self):
+        search_string = "@global1, @global2"
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, ['global1', 'global2'])
+        self.assertEqual(specific_words, [])
+
+    def test_empty_string(self):
+        search_string = ""
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, [])
+        self.assertEqual(specific_words, [])
+
+    def test_mixed_words(self):
+        search_string = "@global (local1, local2), @another_global"
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, ['global', 'another_global'])
+        self.assertEqual(specific_words, ['local1', 'local2'])
+
+    def test_whitespace(self):
+        search_string = " @Global ,  ( local1 , local2 ) "
+        anywhere_words, specific_words = find_words(search_string)
+        self.assertEqual(anywhere_words, ['Global'])
+        self.assertEqual(specific_words, ['local1', 'local2'])
+
+
+class TestCheckParentheses(unittest.TestCase):
+    def test_balanced_parentheses(self):
+        self.assertEqual(check_parentheses("(())"), "")
+        self.assertEqual(check_parentheses("(someText())"), "")
+        self.assertEqual(check_parentheses("((some)text())"), "")
+        self.assertEqual(check_parentheses("()"), "")
+
+    def test_unbalanced_parentheses(self):
+        self.assertEqual(check_parentheses("(()"), "(")
+        self.assertEqual(check_parentheses("()someText("), "(")
+        self.assertEqual(check_parentheses("(text)text)"), ")")
+        self.assertEqual(check_parentheses("text)"), ")")
+
+    def test_mixed_parentheses(self):
+        self.assertEqual(check_parentheses("(()(())"), "(")
+        self.assertEqual(check_parentheses("(someText))((someText)"), ")(")
+        self.assertEqual(check_parentheses("((someText))someText"), "")
+        self.assertEqual(check_parentheses("(someText(someText))someText"), "")
+
+    def test_special_cases(self):
+        self.assertEqual(check_parentheses(""), "")
+        self.assertEqual(check_parentheses("abc"), "")
+        self.assertEqual(check_parentheses("((()))("), "(")
+        self.assertEqual(check_parentheses("text"), "")
+
+    def test_reverse_and_flip_parentheses(self):
+        self.assertEqual(reverse_and_flip_parentheses("(abc)"), "(cba)")
+        self.assertEqual(reverse_and_flip_parentheses("Hello()"), "()olleH")
+        self.assertEqual(reverse_and_flip_parentheses(")("), ")(")
+        self.assertEqual(reverse_and_flip_parentheses("((()))"), "((()))")
+        self.assertEqual(reverse_and_flip_parentheses("()()()"), "()()()")
+        self.assertEqual(reverse_and_flip_parentheses("abc"), "cba")
+        self.assertEqual(reverse_and_flip_parentheses("123(abc)321"), "123(cba)321")
+        self.assertEqual(reverse_and_flip_parentheses("a(bc)d"), "d(cb)a")
+
+
+class TestConstructDelimiterMap(unittest.TestCase):
+    def test_empty_text(self):
+        self.assertEqual(construct_delimiter_map("", ["word1", "word2"]), {})
+
+    def test_empty_words(self):
+        self.assertEqual(construct_delimiter_map("word1,word2", []), {})
+
+    def test_single_occurrence(self):
+        text = "word1,word2"
+        expected_result = {
+            ("word1", "word2"): "",
+            ("word2", "word1"): ""
+        }
+        self.assertEqual(construct_delimiter_map(text, ["word1", "word2"]), expected_result)
+
+    def test_multiple_words(self):
+        text = "word0,((word1),word2)"
+        expected_result = {
+            ("word0", "word1"): "((",
+            ("word0", "word2"): "(",
+            ("word1", "word0"): "))",
+            ("word1", "word2"): ")",
+            ("word2", "word1"): "(",
+            ("word2", "word0"): ")"
+        }
+        self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+        text = "word0 , ( (word1 ), word2)"
+        self.assertEqual(construct_delimiter_map(text, ["word0", "word1", "word2"]), expected_result)
+
+
+class TestVerifyDelimiters(unittest.TestCase):
+    def base_verify_func(self, query_text, text, anywhere_words, specific_words, expected_result):
+        delimiter_map = construct_delimiter_map(query_text, specific_words)
+        actual_result = verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map)
+        self.assertEqual(actual_result, expected_result)
+
+    def test_all_conditions_met(self):
+        query_text = "word0,((word1),word2)"
+        specific_words = ["word0", "word1", "word2"]
+        text = "word0,((word1),word2)"
+        self.base_verify_func(query_text, text, [], specific_words, True)
+        text = "((word1),word2), word0"
+        self.base_verify_func(query_text, text, [], specific_words, True)
+        text = "word0,(word2, (word1))"
+        self.base_verify_func(query_text, text, [], specific_words, True)
+        text = "word0,((word1),(ExtraGroup),word2)"
+        self.base_verify_func(query_text, text, [], specific_words, True)
+        text = "word0,((word2),word1)"
+        self.base_verify_func(query_text, text, [], specific_words, False)
+        text = "((word1),word0), word2"
+        self.base_verify_func(query_text, text, [], specific_words, False)
+        text = "word0,((word1))"
+        self.base_verify_func(query_text, text, [], specific_words, False)
+        text = "(word1),(ExtraGroup),word2)"
+        self.base_verify_func(query_text, text, [], specific_words, False)
+
+    def test_complex_case_with_word_identifiers(self):
+        query_text = "word0,((word1),@word2,@word3,word4)"
+        specific_words = ["word0", "word1", "word4"]
+        anywhere_words = ["word2", "word3"]
+        text = "word0,((word1),word2,word3,word4)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+        text = "word2,word0,((word1),word3,word4)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+        text = "word3,((word1),word2,word4),word0"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+        text = "word0,((word1),word4),word2,word3"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+        text = "word0,word1,word4,word2"  # Incorrect delimiters
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+        text = "word2,word3"  # Missing specific words
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+    def test_very_complex_case_with_word_identifiers(self):
+        query_text = "word0,(((word1,word2),@word3)),((word4,word5)))"
+        specific_words = ["word0", "word1", "word2", "word4", "word5"]
+        anywhere_words = ["word3"]
+
+        # Test case where all conditions are met
+        text = "word0,(((word1,word2),word3)),((word4,word5)))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Test case with anywhere words out of specific context but still in the string
+        text = "word3,word0,(((word1,word2))),((word4,word5)))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Test case with correct specific words but incorrect delimiters
+        text = "word0,((word1,word2),word3),(word4,word5)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+        # Test case missing one specific word
+        text = "word0,(((word1,word2),word3)),(word4))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+        # Test case missing anywhere word
+        text = "word0,(((word1,word2))),((word4,word5)))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+    def test_incorrect_single_delimiter(self):
+        query_text = "word0,((word1)),word2"
+        specific_words = ["word0", "word1", "word2"]
+        anywhere_words = []
+
+        # Positive case 1: Exact match
+        text = "word0,((word1)),word2"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Positive case 2: Additional parentheses around the entire sequence
+        text = "(word0,((word1)),word2)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Single closing parenthesis missing between word1 and word2
+        text = "word0,((word1),word2)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+        # Single opening parenthesis missing between word0 and word1
+        text = "word0,(word1)),word2"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+    def test_mismatched_parentheses(self):
+        query_text = "word0,((word1)),(word2,word3)"
+        specific_words = ["word0", "word1", "word2", "word3"]
+        anywhere_words = []
+
+        # Positive case 1: Exact match
+        text = "word0,((word1)),(word2,word3)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Positive case 2: Reordered sequence with the same delimiters
+        text = "(word2,word3),word0,((word1))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Positive case 3: Additional text in between but the delimiters remain the same
+        text = "word0,someExtraText,((word1)),someMoreText,(word2,word3)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
+
+        # Extra closing parenthesis between word2 and word3
+        text = "word0,((word1),(word2,word3))"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+        # Extra opening parenthesis between word1 and word2
+        text = "word0,((word1),((word2,word3)"
+        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+
+    def test_wildcard_matching_verify_delimiters(self):
+        query_text = "word0, ((word1.*?)), word2.*?"
+        delimiter_map = construct_delimiter_map(query_text, ["word0", "word1.*?", "word2.*?"])
+
+        # Positive test cases
+        text = "((word1)), word0, word2X"
+        self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+        text = "word0, ((word1Y)), word2Z"
+        self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+        # Negative test cases
+        text = "word0, (word1), word2"
+        self.assertFalse(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
+class TestFindMatching(unittest.TestCase):
+    def base_find_matching(self, series, search_string, expected):
+        mask = find_matching(series, search_string)
+        self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}")
+
+    def test_basic_matching(self):
+        series = pd.Series([
+            "(word1), word0, ((word2))",
+            "word0, ((word1)), word2",
+            "(word1), word0, (word2)"
+        ])
+        search_string = "word0, ((word1)), word2"
+        expected = pd.Series([False, True, False])
+        self.base_find_matching(series, search_string, expected)
+
+    def test_anywhere_words(self):
+        series = pd.Series([
+            "(word1), word0, ((word2))",
+            "word0, ((word1)), word2",
+            "word0, (word3), ((word1)), word2"
+        ])
+        search_string = "@word3, word0, ((word1)), word2"
+        expected = pd.Series([False, False, True])
+        self.base_find_matching(series, search_string, expected)
+
+    def test_mismatched_parentheses(self):
+        series = pd.Series([
+            "(word1), word0, ((word2))",
+            "word0, ((word1)), word2",
+            "word0, (word1)), word2",
+            "word0, ((word1), word2"
+        ])
+        search_string = "word0, ((word1)), word2"
+        expected = pd.Series([False, True, False, False])
+        self.base_find_matching(series, search_string, expected)
+
+    def test_wildcard_matching(self):
+        series = pd.Series([
+            "word2, word0, ((word1X))",
+            "word0, ((word1Y)), word2Z",
+            "word0, (word1), word2"
+        ])
+        search_string = "word0, ((word1*)), word2*"
+        expected = pd.Series([True, True, False])
+        self.base_find_matching(series, search_string, expected)
diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py
index cca54411..5bdb71b7 100644
--- a/tests/models/test_expression_parser.py
+++ b/tests/models/test_expression_parser.py
@@ -118,7 +118,7 @@ def test_finding_tags2(self):
             "Agent, (Event)": True,
             "(Item), (Event)": True
         }
-        self.base_test("(Item or Agent) and [[Action or Event]]", test_strings)
+        self.base_test("(Item or Agent) and {Action or Event}", test_strings)
 
     def test_exact_group(self):
         test_strings = {
@@ -131,7 +131,7 @@ def test_exact_group(self):
             "(A, B, (C, D))": True,
             "(A, B, C)": True
         }
-        self.base_test("[[a, b]]", test_strings)
+        self.base_test("{a, b}", test_strings)
 
     def test_exact_group_simple_complex(self):
         test_strings = {
@@ -145,7 +145,7 @@ def test_exact_group_simple_complex(self):
             "(E, F, (A, B, (C, D)))": True,
             "(A, B, (E, F, (C, D)))": False,  # TODO: Should this be True?  [[c]] isn't directly inside an a group.
         }
-        self.base_test("[[a, [[c]] ]]", test_strings)
+        self.base_test("{a, {c} }", test_strings)
 
     def test_exact_group_complex(self):
         test_strings = {
@@ -155,7 +155,7 @@ def test_exact_group_complex(self):
             "(A, B, ((C, D)))": False,
             "(E, F, (A, B, (C, D)))": True,
         }
-        self.base_test("[[a, b, [[c, d]] ]]", test_strings)
+        self.base_test("{a, b, {c, d} }", test_strings)
 
     def test_duplicate_search(self):
         test_strings = {
@@ -183,7 +183,7 @@ def test_exact_group_complex_split(self):
             "(E, F, (A, B, (C, D)))": False,
             "((A, B), (C, D))": True,
         }
-        self.base_test("[[ [[a, b]], [[c, d]] ]]", test_strings)
+        self.base_test("{ {a, b}, {c, d} }", test_strings)
 
     def test_mixed_group_split(self):
         test_strings = {
@@ -192,7 +192,7 @@ def test_mixed_group_split(self):
             "((Event), ((Clear-throat)))": True,
             "((Event, Clear-throat))": False,
         }
-        self.base_test("[[ [Event], [Action] ]]", test_strings)
+        self.base_test("{ [Event], [Action] }", test_strings)
 
     def test_exact_group_split(self):
         test_strings = {
@@ -201,7 +201,7 @@ def test_exact_group_split(self):
             "((Event), ((Clear-throat)))": False,
             "((Event, Clear-throat))": False,
         }
-        self.base_test("[[ [[Event]], [[Action]] ]]", test_strings)
+        self.base_test("{ {Event}, {Action} }", test_strings)
 
     def test_exact_group_split_or(self):
         test_strings = {
@@ -210,17 +210,18 @@ def test_exact_group_split_or(self):
             "((A), ((D)))": True,
             "((A, D))": True,
         }
-        self.base_test("[[ [[a]] or [[d]] ]]", test_strings)
+        self.base_test("{ {a} or {d} }", test_strings)
 
     def test_exact_group_split_or_negation(self):
         test_strings = {
-            "(Event, Clear-throat)": False,
+            # "(Event, Clear-throat)": False,
             "((Event), (Clear-throat))": True,
             "((Event))": False,
             "((Event), ((Clear-throat)))": True,
             "((Event, Clear-throat))": False,
         }
-        self.base_test("[[ [[~Event]] ]]", test_strings)
+        # Need to think this through more.  How do you exact match a negative tag?
+        self.base_test("{ {~Event} }", test_strings)
 
     def test_exact_group_split_or_negation_dual(self):
         test_strings = {
@@ -233,7 +234,7 @@ def test_exact_group_split_or_negation_dual(self):
             "((A), (B, C))": False,
             "((A), ((B), C))": True,
         }
-        self.base_test("[[ [[~a and ~b]] ]]", test_strings)
+        self.base_test("{ {~a and ~b} }", test_strings)
 
     def test_exact_group_split_or_negation_dual2(self):
         test_strings = {
@@ -246,7 +247,7 @@ def test_exact_group_split_or_negation_dual2(self):
             "((A), (B, C))": False,
             "((A), ((B), C))": True,
         }
-        self.base_test("[[ [[~(a or b)]] ]]", test_strings)
+        self.base_test("{ {~(a or b)} }", test_strings)
 
     def test_exact_group_split_or_negation_complex(self):
         test_strings = {
@@ -260,7 +261,7 @@ def test_exact_group_split_or_negation_complex(self):
             "((A), (B, C)), (D)": False,
             "((A), (B, C)), (H)": False,
         }
-        self.base_test("[[ [[~(a or b)]] ]] and [[D or ~F]]", test_strings)
+        self.base_test("{ {~(a or b)} } and {D or ~F}", test_strings)
 
     # TODO: Should this work, and what should it mean?
     # Right now this is always true, since there is at least one group without ", (a)" in every string.
@@ -272,7 +273,7 @@ def test_exact_group_negation(self):
             "((A), ((D)))": True,
             "((A, D))": True,
         }
-        self.base_test("[[ ~[[a]] ]]", test_strings)
+        self.base_test("{ ~{a} }", test_strings)
 
     def test_exact_group_negation2(self):
         test_strings = {
@@ -282,9 +283,42 @@ def test_exact_group_negation2(self):
             "((A), ((D, B)))": True,
             "((A, D))": False,
             "(B, (D))": True,
-            "(B)": True
+            "(B)": True,
+            "((A), B)": False
         }
-        self.base_test("[[ ~[[a]], b]]", test_strings)
+        self.base_test("{ ~{a}, b}", test_strings)
+
+    def test_exact_group_negation3(self):
+        test_strings = {
+            "(A, D, B)": False,
+            "((A), (D), B)": True,
+            "((A))": False,
+            "((A), ((D, B)))": True,
+            "((A, D))": False,
+            "(B, (D))": True,
+            "(B)": True,
+            "((A), B)": True
+        }
+        self.base_test("{ ~a and b}", test_strings)
+
+    def test_exact_group_negation4(self):
+        test_strings = {
+            "(A, D, B)": False,
+            "((A), (D), B)": False,
+            "((A))": False,
+            "((A), ((D, B)))": False,
+            "((A, D))": False,
+            "(B)": True,
+            "(B, (D))": True,
+            "((A), B)": False
+        }
+        self.base_test("{ @c and @a and b: ???}", test_strings)
+
+    def test_exact_group_negation5(self):
+        test_string = "{ ~a and b:}"
+        with self.assertRaises(ValueError) as context:
+            QueryParser(test_string)
+        self.assertTrue(context.exception.args[0])
 
     def test_mixed_group_complex_split(self):
         test_strings = {
@@ -297,7 +331,7 @@ def test_mixed_group_complex_split(self):
             "((A, B), (C, D))": True,
             "((A, B, C, D))": False,
         }
-        self.base_test("[[ [a, b], [c, d] ]]", test_strings)
+        self.base_test("{ [a, b], [c, d] }", test_strings)
 
     def test_exact_group_complex2(self):
         test_strings = {
@@ -309,7 +343,7 @@ def test_exact_group_complex2(self):
             "(B, (C)), (A, B, (C))": True,
             "(A, B, (A, (C)))": False
         }
-        self.base_test("[[a, b, [[c]] ]]", test_strings)
+        self.base_test("{a, b, {c} }", test_strings)
 
     def test_containing_group_complex2(self):
         test_strings = {
@@ -362,13 +396,13 @@ def test_mixed_groups(self):
         test_strings = {
             "(A, B), (C, D, (E, F))": True
         }
-        self.base_test("[[a]], [[ [[e, f]] ]]", test_strings)
+        self.base_test("{a}, { {e, f} }", test_strings)
 
         test_strings = {
             "(A, B), (C, D, (E, F))": False
         }
         # This example works because it finds the group containing (c, d, (e, f)), rather than the ef group
-        self.base_test("[[a]], [e, [[f]] ]", test_strings)
+        self.base_test("{a}, [e, {f} ]", test_strings)
 
     def test_and(self):
         test_strings = {
@@ -411,18 +445,17 @@ def test_and_wildcard_nothing_else(self):
             "A": False,
             "B": False,
             "C": False,
-            "A, B": True,
+            "A, B": False,
             "A, C": False,
             "B, C": False,
             "A, B, C": False,
             "D, A, B": False,
             "A, B, (C)": False,
             "(A, B), C": True,
-            "(A, B, C)": False,
+            "(A, B, C)": True,
         }
         self.base_test("{a and b}", test_strings)
 
-    def test_and_wildcard_nothing_else2(self):
         test_strings = {
             "A": False,
             "B": False,
@@ -436,8 +469,7 @@ def test_and_wildcard_nothing_else2(self):
             "(A, B), C": True,
             "(A, B, C)": False,
         }
-        self.base_test("[{a and b}]", test_strings)
-        self.base_test("[[{a and b}]]", test_strings)
+        self.base_test("{a and b:}", test_strings)
 
     def test_and_logical_wildcard(self):
         test_strings = {
@@ -450,9 +482,11 @@ def test_and_logical_wildcard(self):
         self.base_test("A, B and ?", test_strings)
 
         test_strings = {
-            "A": False,
+            "A": True,
             "A, C": True,
             "A, B, C": True,
+            "B, C": False,
+            "B, C, D, E": True
         }
         self.base_test("(a or (b and c) and ?)", test_strings)
 
@@ -469,7 +503,7 @@ def test_double_wildcard(self):
 
     def test_or_wildcard(self):
         test_strings = {
-            "A": False,
+            "A": True,
             "B": False,
             "C": False,
             "A, B": True,
@@ -589,10 +623,10 @@ def test_and_or(self):
         self.base_test("a and (b or c)", test_strings)
 
         test_strings = {
-            "A": False,
+            "A": True,
             "B": False,
             "C": False,
-            "A, B": False,
+            "A, B": True,
             "A, C": True,
             "B, C": True
         }
@@ -698,35 +732,43 @@ def test_not_in_line3(self):
 
     def test_optional_exact_group(self):
         test_strings = {
-            "A, C": True,
+            "(A, C)": True,
         }
         self.base_test("{a and (b or c)}", test_strings)
 
         test_strings = {
-            "A, B, C, D": True,
+            "(A, B, C, D)": True,
         }
         self.base_test("{a and b: c and d}", test_strings)
 
         test_strings = {
-            "A, B, C": True,
-            "A, B, C, D": False,
+            "(A, B, C)": True,
+            "(A, B, C, D)": False,
         }
         self.base_test("{a and b: c or d}", test_strings)
 
         test_strings = {
-            "A, C": True,
-            "A, D": True,
-            "A, B, C": False,
-            "A, B, C, D": False,
+            "(A, C)": True,
+            "(A, D)": True,
+            "(A, B, C)": False,
+            "(A, B, C, D)": False,
         }
         self.base_test("{a or b: c or d}", test_strings)
 
         test_strings = {
             "(Onset, (Def-expand/taco))": True,
+            "(Onset, Def-expand/taco)": False,
+            "(Onset, Def/taco, (Def-expand/taco))": True, # this one validates
+            "(Onset, (Def/taco))": False,
             "(Onset, (Def-expand/taco, (Label/DefContents)))": True,
             "(Onset, (Def-expand/taco), (Label/OnsetContents))": True,
             "(Onset, (Def-expand/taco), (Label/OnsetContents, Description/MoreContents))": True,
             "Onset, (Def-expand/taco), (Label/OnsetContents)": False,
             "(Onset, (Def-expand/taco), Label/OnsetContents)": False,
         }
-        self.base_test("[[{(Onset or Offset), (Def or [[Def-expand]]): ???}]]", test_strings)
\ No newline at end of file
+        self.base_test("{(Onset or Offset), (Def or {Def-expand}): ???}", test_strings)
+        test_strings = {
+            "(A, B)": True,
+            "(A, B, C)": True
+        }
+        self.base_test("{a or b}", test_strings)
\ No newline at end of file
diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py
index f3591ead..c21d839a 100644
--- a/tests/schema/test_hed_schema_io.py
+++ b/tests/schema/test_hed_schema_io.py
@@ -170,6 +170,8 @@ def _base_merging_test(self, files):
                     reload1 = load_schema(path1)
                     reload2 = load_schema(path2)
                     self.assertEqual(reload1, reload2)
+                except Exception:
+                    self.assertTrue(False)
                 finally:
                     os.remove(path1)
                     os.remove(path2)
@@ -183,6 +185,8 @@ def _base_merging_test(self, files):
                     reload1 = load_schema(path1)
                     reload2 = load_schema(path2)
                     self.assertEqual(reload1, reload2)
+                except Exception:
+                    self.assertTrue(False)
                 finally:
                     os.remove(path1)
                     os.remove(path2)

From ab2a25cd9b00ff0be4d7f311b707af2ace4ebf99 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 28 Sep 2023 18:16:38 -0500
Subject: [PATCH 02/21] Update schema validation and tests Validate most
 attribute values now Update error codes to match spec more All sections are
 now required in mediawiki schema misc minor fixes

---
 hed/errors/error_types.py                     | 11 ++-
 hed/errors/exceptions.py                      | 17 ++--
 hed/errors/known_error_codes.py               |  1 +
 hed/errors/schema_error_messages.py           | 39 +++++---
 hed/schema/hed_schema.py                      |  4 +-
 hed/schema/hed_schema_constants.py            |  1 +
 hed/schema/hed_schema_entry.py                | 16 ++--
 hed/schema/schema_attribute_validators.py     | 64 ++++++++++++-
 hed/schema/schema_compliance.py               | 53 +++++------
 hed/schema/schema_io/schema2base.py           |  5 -
 hed/schema/schema_io/wiki2schema.py           | 95 +++++++++++++------
 hed/schema/schema_validation_util.py          | 12 ++-
 spec_tests/hed-specification                  |  2 +-
 spec_tests/test_errors.py                     | 50 +---------
 .../issues_tests/HED_badroot_0.0.1.mediawiki  | 10 ++
 .../HED_dupesubroot_0.0.1.mediawiki           | 10 ++
 .../issues_tests/HED_root_invalid1.mediawiki  | 10 ++
 .../issues_tests/HED_root_invalid2.mediawiki  | 10 ++
 .../issues_tests/HED_root_invalid3.mediawiki  | 10 ++
 .../HED_root_wrong_place_0.0.1.mediawiki      | 10 ++
 .../issues_tests/overlapping_tags1.mediawiki  |  7 ++
 .../issues_tests/overlapping_tags2.mediawiki  |  8 ++
 .../issues_tests/overlapping_tags3.mediawiki  |  8 ++
 .../issues_tests/overlapping_tags4.mediawiki  |  8 ++
 .../overlapping_unit_classes.mediawiki        |  7 ++
 .../issues_tests/overlapping_units.mediawiki  |  7 ++
 .../merge_tests/sorted_root.mediawiki         | 10 ++
 .../wiki_tests/HED_default.mediawiki          | 14 ++-
 .../wiki_tests/attribute_unknown1.mediawiki   | 41 ++++++++
 .../validator_tests/bids_schema.mediawiki     |  5 +
 tests/schema/test_hed_schema_io.py            |  8 +-
 tests/schema/test_schema_wiki_fatal_errors.py | 54 ++++++-----
 tests/schema/util_create_schemas.py           | 23 ++++-
 33 files changed, 445 insertions(+), 185 deletions(-)
 create mode 100644 tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
index 7305e7c6..a866ec32 100644
--- a/hed/errors/error_types.py
+++ b/hed/errors/error_types.py
@@ -107,7 +107,7 @@ class SidecarErrors:
 
 class SchemaErrors:
     SCHEMA_DUPLICATE_NODE = 'SCHEMA_DUPLICATE_NODE'
-    SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+
     SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
 
 
@@ -119,19 +119,22 @@ class SchemaWarnings:
     SCHEMA_CHARACTER_INVALID = "SCHEMA_CHARACTER_INVALID"
     SCHEMA_INVALID_CAPITALIZATION = 'invalidCaps'
     SCHEMA_NON_PLACEHOLDER_HAS_CLASS = 'SCHEMA_NON_PLACEHOLDER_HAS_CLASS'
-    SCHEMA_INVALID_ATTRIBUTE = "SCHEMA_INVALID_ATTRIBUTE"
 
 
 class SchemaAttributeErrors:
+    SCHEMA_ATTRIBUTE_INVALID = 'SCHEMA_ATTRIBUTE_INVALID'
+    SCHEMA_ATTRIBUTE_VALUE_INVALID = 'SCHEMA_ATTRIBUTE_VALUE_INVALID'
     SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID"
     SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID"
-    SCHEMA_RELATED_TAG_INVALID = "SCHEMA_RELATED_TAG_INVALID"
 
     SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID"
     SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID"
+    SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID"
+    SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID"
 
     SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID"
-    SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" # Reported as SCHEMA_DEPRECATED_INVALID
+    SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED"
+    SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE"
 
 
 class DefinitionErrors:
diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py
index e7ee857b..e368ec43 100644
--- a/hed/errors/exceptions.py
+++ b/hed/errors/exceptions.py
@@ -14,8 +14,9 @@ class HedExceptions:
     INVALID_DATAFRAME = 'INVALID_DATAFRAME'
     INVALID_FILE_FORMAT = 'INVALID_FILE_FORMAT'
     # These are actual schema issues, not that the file cannot be found or parsed
-    SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
-    HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
+    SCHEMA_HEADER_MISSING = 'SCHEMA_HEADER_INVALID'
+    SCHEMA_HEADER_INVALID = 'SCHEMA_HEADER_INVALID'
+    SCHEMA_UNKNOWN_HEADER_ATTRIBUTE = "SCHEMA_HEADER_INVALID"
 
     SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
     BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
@@ -26,14 +27,14 @@ class HedExceptions:
     ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
     IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"
 
-    HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
-    SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
-    SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
-    HED_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
-    INVALID_SECTION_SEPARATOR = 'invalidSectionSeparator'
+    SCHEMA_VERSION_INVALID = 'SCHEMA_VERSION_INVALID'
+    SCHEMA_SECTION_MISSING = 'SCHEMA_SECTION_MISSING'
+
+    WIKI_SEPARATOR_INVALID = 'invalidSectionSeparator'
 
     # This issue will contain a list of lines with issues.
-    HED_WIKI_DELIMITERS_INVALID = 'HED_WIKI_DELIMITERS_INVALID'
+    WIKI_DELIMITERS_INVALID = 'WIKI_DELIMITERS_INVALID'
+    WIKI_LINE_START_INVALID = 'WIKI_LINE_START_INVALID'
     HED_SCHEMA_NODE_NAME_INVALID = 'HED_SCHEMA_NODE_NAME_INVALID'
 
     SCHEMA_DUPLICATE_PREFIX = 'schemaDuplicatePrefix'
diff --git a/hed/errors/known_error_codes.py b/hed/errors/known_error_codes.py
index b72e8470..b8962682 100644
--- a/hed/errors/known_error_codes.py
+++ b/hed/errors/known_error_codes.py
@@ -31,6 +31,7 @@
     ],
     "schema_validation_errors": [
         "SCHEMA_ATTRIBUTE_INVALID",
+        "SCHEMA_ATTRIBUTE_VALUE_INVALID",
         "SCHEMA_CHARACTER_INVALID",
         "SCHEMA_DUPLICATE_NODE",
         "SCHEMA_HEADER_INVALID",
diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py
index b7fda9d5..8c196f9e 100644
--- a/hed/errors/schema_error_messages.py
+++ b/hed/errors/schema_error_messages.py
@@ -16,7 +16,7 @@ def schema_error_hed_duplicate_from_library(tag, duplicate_tag_list, section):
            f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
 
 
-@hed_error(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID)
 def schema_error_unknown_attribute(attribute_name, source_tag):
     return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
            f"or was used outside of it's defined class."
@@ -40,45 +40,58 @@ def schema_warning_SCHEMA_INVALID_CAPITALIZATION(tag_name, problem_char, char_in
            f"Found character '{problem_char}' in tag '{tag_name}' at position {char_index}."
 
 
-@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING)
+@hed_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, default_severity=ErrorSeverity.WARNING,
+           actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
     return "Only placeholder nodes('#') can have a unit class, value class, or takes value." + \
            f"Found {invalid_attribute_name} on {tag_name}"
 
 
-@hed_error(SchemaWarnings.SCHEMA_INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
-def schema_error_SCHEMA_INVALID_ATTRIBUTE(tag_name, invalid_attribute_name):
-    return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
-           f"Something went very wrong."
 
-
-@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_DEPRECATED_INVALID(tag_name, invalid_deprecated_version):
     return f"'{tag_name}' has invalid or unknown value in attribute deprecatedFrom: '{invalid_deprecated_version}'."
 
 
 @hed_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
-           actual_code=SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)
+           actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_CHILD_OF_DEPRECATED(deprecated_tag, non_deprecated_child):
     return f"Deprecated tag '{deprecated_tag}' has a child that is not deprecated: '{non_deprecated_child}'."
 
 
-@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name):
     return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'."
 
 
-@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name):
     return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
 
 
-@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name):
     return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'."
 
 
-@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)
+@hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
 def schema_error_SCHEMA_DEFAULT_UNITS_INVALID(tag, bad_unit, valid_units):
     valid_units = ",".join(valid_units)
     return f"Tag '{tag}' has an invalid defaultUnit '{bad_unit}'.  Valid units are: '{valid_units}'."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE(tag, conversion_factor):
+    return f"Tag '{tag}' has an invalid conversionFactor '{conversion_factor}'.  Conversion factor must be positive."
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_ALLOWED_CHARACTERS_INVALID(tag, invalid_character):
+    return (f"Tag '{tag}' has an invalid allowedCharacter: '{invalid_character}'.  "
+            f"Allowed characters are: a single character, "
+            f"or one of the following - letters, blank, digits, alphanumeric.")
+
+
+@hed_error(SchemaAttributeErrors.SCHEMA_IN_LIBRARY_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID)
+def schema_error_SCHEMA_IN_LIBRARY_INVALID(tag, bad_library):
+    return (f"Tag '{tag}' has an invalid inLibrary: '{bad_library}'.  ")
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
index 4cb3729c..0857abe9 100644
--- a/hed/schema/hed_schema.py
+++ b/hed/schema/hed_schema.py
@@ -731,7 +731,9 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
         # Add the InLibrary attribute to any library schemas as they are loaded
         # These are later removed when they are saved out, if saving unmerged
         if self.library and (not self.with_standard or (not self.merged and self.with_standard)):
-            new_entry._set_attribute_value(HedKey.InLibrary, self.library)
+            # only add it if not already present - This is a rare case
+            if not new_entry.has_attribute(HedKey.InLibrary):
+                new_entry._set_attribute_value(HedKey.InLibrary, self.library)
 
         section = self._sections[key_class]
         return section._add_to_dict(long_tag_name, new_entry)
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
index 0cecc4ab..60a1a934 100644
--- a/hed/schema/hed_schema_constants.py
+++ b/hed/schema/hed_schema_constants.py
@@ -42,6 +42,7 @@ class HedKey:
     SuggestedTag = "suggestedTag"
     Rooted = "rooted"
     DeprecatedFrom = "deprecatedFrom"
+    ConversionFactor = "conversionFactor"
 
     # All known properties
     BoolProperty = 'boolProperty'
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 102795d8..936943e8 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -176,7 +176,6 @@ def __eq__(self, other):
             return False
         return True
 
-
 class UnitEntry(HedSchemaEntry):
     """ A single unit entry with modifiers in the HedSchema. """
     def __init__(self, *args, **kwargs):
@@ -207,12 +206,13 @@ def finalize_entry(self, schema):
         self.derivative_units = derivative_units
 
     def _get_conversion_factor(self, modifier_entry):
-
-        base_factor = float(self.attributes.get("conversionFactor", "1.0").replace("^", "e"))
-        if modifier_entry:
-            modifier_factor = float(modifier_entry.attributes.get("conversionFactor", "1.0").replace("^", "e"))
-        else:
-            modifier_factor = 1.0
+        base_factor = modifier_factor = 1.0
+        try:
+            base_factor = float(self.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+            if modifier_entry:
+                modifier_factor = float(modifier_entry.attributes.get(HedKey.ConversionFactor, "1.0").replace("^", "e"))
+        except (ValueError, AttributeError) as e:
+            pass  # Just default to 1.0
         return base_factor * modifier_factor
 
     def get_conversion_factor(self, unit_name):
@@ -224,7 +224,7 @@ def get_conversion_factor(self, unit_name):
         Returns:
             conversion_factor(float or None): Returns the conversion factor or None
         """
-        if "conversionFactor" in self.attributes:
+        if HedKey.ConversionFactor in self.attributes:
             return float(self.derivative_units.get(unit_name))
 
 class HedTagEntry(HedSchemaEntry):
diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py
index d1d7f5ec..0ccb9c33 100644
--- a/hed/schema/schema_attribute_validators.py
+++ b/hed/schema/schema_attribute_validators.py
@@ -150,4 +150,66 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name):
             issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CHILD_OF_DEPRECATED,
                                                 tag_entry.name,
                                                 child.name)
-    return issues
\ No newline at end of file
+    return issues
+
+
+def conversion_factor(hed_schema, tag_entry, attribute_name):
+    issues = []
+    conversion_factor = tag_entry.attributes.get(attribute_name, "1.0")
+    try:
+        conversion_factor = float(conversion_factor.replace("^", "e"))
+    except (ValueError, AttributeError) as e:
+        pass
+    if not isinstance(conversion_factor, float) or conversion_factor <= 0.0:
+        issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE,
+                                            tag_entry.name,
+                                            conversion_factor)
+
+    return issues
+
+
+def allowed_characters_check(hed_schema, tag_entry, attribute_name):
+    """ Check allowed character has a valid value
+
+    Parameters:
+        hed_schema (HedSchema): The schema to use for validation
+        tag_entry (HedSchemaEntry): The schema entry for this attribute.
+        attribute_name (str): The name of this attribute
+
+    Returns:
+        list: A list of issues. Each issue is a dictionary.
+
+    """
+    issues = []
+    allowed_strings = {'letters', 'blank', 'digits', 'alphanumeric'}
+
+    char_string = tag_entry.attributes.get(attribute_name, "")
+    characters = char_string.split(",")
+    for character in characters:
+        if character not in allowed_strings and len(character) != 1:
+            issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+                                                tag_entry.name,
+                                                character)
+    return issues
+
+
+def in_library_check(hed_schema, tag_entry, attribute_name):
+    """ Check allowed character has a valid value
+
+    Parameters:
+        hed_schema (HedSchema): The schema to use for validation
+        tag_entry (HedSchemaEntry): The schema entry for this attribute.
+        attribute_name (str): The name of this attribute
+
+    Returns:
+        list: A list of issues. Each issue is a dictionary.
+
+    """
+    issues = []
+
+    library = tag_entry.attributes.get(attribute_name, "")
+    if hed_schema.library != library:
+        issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ALLOWED_CHARACTERS_INVALID,
+                                            tag_entry.name,
+                                            library)
+    return issues
diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py
index c75c11de..1a68baf8 100644
--- a/hed/schema/schema_compliance.py
+++ b/hed/schema/schema_compliance.py
@@ -45,27 +45,20 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl
 class SchemaValidator:
     """Validator class to wrap some code.  In general, just call check_compliance."""
     attribute_validators = {
-        HedKey.SuggestedTag: [(schema_attribute_validators.tag_exists_check,
-                              SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID)],
-        HedKey.RelatedTag: [(schema_attribute_validators.tag_exists_check,
-                            SchemaAttributeErrors.SCHEMA_RELATED_TAG_INVALID)],
-        HedKey.UnitClass: [(schema_attribute_validators.tag_is_placeholder_check,
-                            SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
-                           (schema_attribute_validators.unit_class_exists,
-                            SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID)],
-        HedKey.ValueClass: [(schema_attribute_validators.tag_is_placeholder_check,
-                             SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS),
-                            (schema_attribute_validators.value_class_exists,
-                             SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID)],
+        HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check],
+        HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check],
+        HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check,
+                           schema_attribute_validators.unit_class_exists],
+        HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check,
+                            schema_attribute_validators.value_class_exists],
         # Rooted tag is implicitly verified on loading
-        # HedKey.Rooted: [(schema_attribute_validators.tag_exists_base_schema_check,
-        #                 SchemaAttributeErrors.SCHEMA_ROOTED_TAG_INVALID)],
-        HedKey.DeprecatedFrom: [(schema_attribute_validators.tag_is_deprecated_check,
-                                SchemaAttributeErrors.SCHEMA_DEPRECATED_INVALID)],
-        HedKey.TakesValue: [(schema_attribute_validators.tag_is_placeholder_check,
-                             SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS)],
-        HedKey.DefaultUnits: [(schema_attribute_validators.unit_exists,
-                             SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID)]
+        # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check],
+        HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check],
+        HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check],
+        HedKey.DefaultUnits: [schema_attribute_validators.unit_exists],
+        HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor],
+        HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check],
+        HedKey.InLibrary: [schema_attribute_validators.in_library_check]
     }
 
     def __init__(self, hed_schema, check_for_warnings=True, error_handler=None):
@@ -80,7 +73,7 @@ def check_unknown_attributes(self):
         if unknown_attributes:
             for attribute_name, source_tags in unknown_attributes.items():
                 for tag in source_tags:
-                    issues_list += self.error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+                    issues_list += self.error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
                                                                                 attribute_name,
                                                                                 source_tag=tag)
         return issues_list
@@ -93,16 +86,14 @@ def check_attributes(self):
             for tag_entry in self.hed_schema[section_key].values():
                 self.error_handler.push_error_context(ErrorContext.SCHEMA_TAG, tag_entry.name)
                 for attribute_name in tag_entry.attributes:
-                    validators = self.attribute_validators.get(attribute_name, None)
-                    if validators:
-                        for validator, error_code in validators:
-                            self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
-                            new_issues = validator(self.hed_schema, tag_entry, attribute_name)
-                            for issue in new_issues:
-                                issue['code'] = error_code
-                                issue['severity'] = ErrorSeverity.WARNING
-                            self.error_handler.add_context_and_filter(new_issues)
-                            issues_list += new_issues
+                    validators = self.attribute_validators.get(attribute_name, [])
+                    for validator in validators:
+                        self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name)
+                        new_issues = validator(self.hed_schema, tag_entry, attribute_name)
+                        for issue in new_issues:
+                            issue['severity'] = ErrorSeverity.WARNING
+                        self.error_handler.add_context_and_filter(new_issues)
+                        issues_list += new_issues
                         self.error_handler.pop_error_context()
                 self.error_handler.pop_error_context()
             self.error_handler.pop_error_context()
diff --git a/hed/schema/schema_io/schema2base.py b/hed/schema/schema_io/schema2base.py
index e373cf1a..d9d082a1 100644
--- a/hed/schema/schema_io/schema2base.py
+++ b/hed/schema/schema_io/schema2base.py
@@ -106,9 +106,6 @@ def _output_tags(self, tags):
         self._end_tag_section()
 
     def _output_units(self, unit_classes):
-        if not unit_classes:
-            return
-
         section_node = self._start_section(HedSectionKey.UnitClasses)
 
         for unit_class_entry in unit_classes.values():
@@ -128,8 +125,6 @@ def _output_units(self, unit_classes):
                 self._write_entry(unit_entry, unit_class_node)
 
     def _output_section(self, hed_schema, key_class):
-        if not hed_schema[key_class]:
-            return
         parent_node = self._start_section(key_class)
         for entry in hed_schema[key_class].values():
             if self._should_skip(entry):
diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
index a02f9ed6..de18f9d6 100644
--- a/hed/schema/schema_io/wiki2schema.py
+++ b/hed/schema/schema_io/wiki2schema.py
@@ -22,12 +22,19 @@
 no_wiki_end_tag = '</nowiki>'
 
 
-ErrorsBySection = {
-    HedWikiSection.Schema: HedExceptions.SCHEMA_START_MISSING,
-    HedWikiSection.EndSchema: HedExceptions.SCHEMA_END_INVALID,
-    HedWikiSection.EndHed: HedExceptions.HED_END_INVALID
-}
-required_sections = [HedWikiSection.Schema, HedWikiSection.EndSchema, HedWikiSection.EndHed]
+
+required_sections = [
+    HedWikiSection.Prologue,
+    HedWikiSection.Schema,
+    HedWikiSection.EndSchema,
+    HedWikiSection.UnitsClasses,
+    HedWikiSection.UnitModifiers,
+    HedWikiSection.ValueClasses,
+    HedWikiSection.Attributes,
+    HedWikiSection.Properties,
+    HedWikiSection.Epilogue,
+    HedWikiSection.EndHed,
+]
 
 
 class SchemaLoaderWiki(SchemaLoader):
@@ -79,15 +86,13 @@ def _parse_data(self):
         # Validate we didn't miss any required sections.
         for section in required_sections:
             if section not in wiki_lines_by_section:
-                error_code = HedExceptions.INVALID_SECTION_SEPARATOR
-                if section in ErrorsBySection:
-                    error_code = ErrorsBySection[section]
+                error_code = HedExceptions.SCHEMA_SECTION_MISSING
                 msg = f"Required section separator '{SectionNames[section]}' not found in file"
                 raise HedFileError(error_code, msg, filename=self.filename)
 
         if self.fatal_errors:
             self.fatal_errors = error_reporter.sort_issues(self.fatal_errors)
-            raise HedFileError(HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+            raise HedFileError(self.fatal_errors[0]['code'],
                                f"{len(self.fatal_errors)} issues found when parsing schema.  See the .issues "
                                f"parameter on this exception for more details.", self.filename,
                                issues=self.fatal_errors)
@@ -109,7 +114,7 @@ def _read_header_section(self, lines):
         for line_number, line in lines:
             if line.strip():
                 msg = f"Extra content [{line}] between HED line and other sections"
-                raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg,  filename=self.filename)
+                raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg,  filename=self.filename)
 
     def _read_text_block(self, lines):
         text = ""
@@ -163,7 +168,8 @@ def _read_schema(self, lines):
                     parent_tags = parent_tags[:level]
                 elif level > len(parent_tags):
                     self._add_fatal_error(line_number, line,
-                                          "Line has too many *'s at the front.  You cannot skip a level.")
+                                          "Line has too many *'s at the front.  You cannot skip a level."
+                                          , HedExceptions.WIKI_LINE_START_INVALID)
                     continue
             # Create the entry
             tag_entry = self._add_tag_line(parent_tags, line_number, line)
@@ -261,14 +267,37 @@ def _get_header_attributes_internal(self, version_line):
         if "=" not in version_line:
             return self._get_header_attributes_internal_old(version_line)
 
-        final_attributes = {}
+        attributes, malformed = self._parse_attributes_line(version_line)
+
+        for m in malformed:
+            # todo: May shift this at some point to report all errors
+            raise HedFileError(code=HedExceptions.SCHEMA_HEADER_INVALID,
+                               message=f"Header line has a malformed attribute {m}",
+                               filename=self.filename)
+        return attributes
+
+    @staticmethod
+    def _parse_attributes_line(version_line):
+        matches = {}
+        unmatched = []
+        last_end = 0
 
         for match in attr_re.finditer(version_line):
-            attr_name = match.group(1)
-            attr_value = match.group(2)
-            final_attributes[attr_name] = attr_value
+            start, end = match.span()
 
-        return final_attributes
+            # If there's unmatched content between the last match and the current one
+            if start > last_end:
+                unmatched.append(version_line[last_end:start])
+
+            matches[match.group(1)] = match.group(2)
+            last_end = end
+
+        # If there's unmatched content after the last match
+        if last_end < len(version_line):
+            unmatched.append(version_line[last_end:])
+
+        unmatched = [m.strip() for m in unmatched if m.strip()]
+        return matches, unmatched
 
     def _get_header_attributes_internal_old(self, version_line):
         """ Extracts all valid attributes like version from the HED line in .mediawiki format.
@@ -288,7 +317,7 @@ def _get_header_attributes_internal_old(self, version_line):
             divider_index = pair.find(':')
             if divider_index == -1:
                 msg = f"Found poorly matched key:value pair in header: {pair}"
-                raise HedFileError(HedExceptions.HED_SCHEMA_HEADER_INVALID, msg, filename=self.filename)
+                raise HedFileError(HedExceptions.SCHEMA_HEADER_INVALID, msg, filename=self.filename)
             key, value = pair[:divider_index], pair[divider_index + 1:]
             key = key.strip()
             value = value.strip()
@@ -369,10 +398,17 @@ def _get_tag_name(self, tag_line):
         return None, 0
 
     @staticmethod
-    def _get_tag_attributes(tag_line, starting_index):
+    def _validate_attribute_string(attribute_string):
+        pattern = r'^[A-Za-z]+(=.+)?$'
+        match = re.fullmatch(pattern, attribute_string)
+        if match:
+            return match.group()
+
+    def _get_tag_attributes(self, line_number, tag_line, starting_index):
         """ Get the tag attributes from a line.
 
         Parameters:
+            line_number (int): The line number to report errors as
             tag_line (str): A tag line.
             starting_index (int): The first index we can check for the brackets.
 
@@ -386,11 +422,14 @@ def _get_tag_attributes(tag_line, starting_index):
             return None, starting_index
         if attr_string:
             attributes_split = [x.strip() for x in attr_string.split(',')]
-            # Filter out attributes with spaces.
-            attributes_split = [a for a in attributes_split if " " not in a]
 
             final_attributes = {}
             for attribute in attributes_split:
+                if self._validate_attribute_string(attribute) is None:
+                    self._add_fatal_error(line_number, tag_line,
+                                          f"Malformed attribute found {attribute}.  "
+                                          f"Valid formatting is: attribute, or attribute=\"value\".")
+                    continue
                 split_attribute = attribute.split("=")
                 if len(split_attribute) == 1:
                     final_attributes[split_attribute[0]] = True
@@ -468,7 +507,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
         if element_name:
             node_name = element_name
 
-        node_attributes, index = self._get_tag_attributes(tag_line, index)
+        node_attributes, index = self._get_tag_attributes(line_number, tag_line, index)
         if node_attributes is None:
             self._add_fatal_error(line_number, tag_line, "Attributes has mismatched delimiters")
             return
@@ -489,7 +528,7 @@ def _create_entry(self, line_number, tag_line, key_class, element_name=None):
         return tag_entry
 
     def _add_fatal_error(self, line_number, line, warning_message="Schema term is empty or the line is malformed",
-                         error_code=HedExceptions.HED_WIKI_DELIMITERS_INVALID):
+                         error_code=HedExceptions.WIKI_DELIMITERS_INVALID):
         self.fatal_errors.append(
             {'code': error_code,
              ErrorContext.ROW: line_number,
@@ -504,14 +543,12 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
             if line.startswith(section_string):
                 if key in strings_for_section:
                     msg = f"Found section {SectionNames[key]} twice"
-                    raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR,
+                    raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID,
                                        msg, filename=self.filename)
                 if current_section < key:
                     new_section = key
                 else:
-                    error_code = HedExceptions.INVALID_SECTION_SEPARATOR
-                    if key in ErrorsBySection:
-                        error_code = ErrorsBySection[key]
+                    error_code = HedExceptions.SCHEMA_SECTION_MISSING
                     msg = f"Found section {SectionNames[key]} out of order in file"
                     raise HedFileError(error_code, msg, filename=self.filename)
                 break
@@ -520,11 +557,11 @@ def _check_for_new_section(self, line, strings_for_section, current_section):
     def _handle_bad_section_sep(self, line, current_section):
         if current_section != HedWikiSection.Schema and line.startswith(wiki_constants.ROOT_TAG):
             msg = f"Invalid section separator '{line.strip()}'"
-            raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+            raise HedFileError(HedExceptions.SCHEMA_SECTION_MISSING, msg, filename=self.filename)
 
         if line.startswith("!#"):
             msg = f"Invalid section separator '{line.strip()}'"
-            raise HedFileError(HedExceptions.INVALID_SECTION_SEPARATOR, msg, filename=self.filename)
+            raise HedFileError(HedExceptions.WIKI_SEPARATOR_INVALID, msg, filename=self.filename)
 
     def _split_lines_into_sections(self, wiki_lines):
         """ Takes a list of lines, and splits it into valid wiki sections.
diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py
index 8404970e..25b27ab8 100644
--- a/hed/schema/schema_validation_util.py
+++ b/hed/schema/schema_validation_util.py
@@ -4,6 +4,7 @@
 from hed.errors import ErrorHandler, SchemaWarnings
 from hed.schema import hed_schema_constants as constants
 from hed.errors.exceptions import HedExceptions, HedFileError
+from hed.schema.hed_schema_constants import valid_header_attributes
 
 ALLOWED_TAG_CHARS = "-"
 ALLOWED_DESC_CHARS = "-_:;,./()+ ^"
@@ -45,9 +46,9 @@ def validate_version_string(version_string):
 
 
 header_attribute_validators = {
-        constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID),
-        constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
-    }
+    constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.SCHEMA_VERSION_INVALID),
+    constants.LIBRARY_ATTRIBUTE: (validate_library_name, HedExceptions.BAD_HED_LIBRARY_NAME)
+}
 
 
 def validate_present_attributes(attrib_dict, filename):
@@ -92,9 +93,12 @@ def validate_attributes(attrib_dict, filename):
             had_error = validator(attribute_value)
             if had_error:
                 raise HedFileError(error_code, had_error, filename)
+        if attribute_name not in valid_header_attributes:
+            raise HedFileError(HedExceptions.SCHEMA_UNKNOWN_HEADER_ATTRIBUTE,
+                               f"Unknown attribute {attribute_name} found in header line", filename=filename)
 
     if constants.VERSION_ATTRIBUTE not in attrib_dict:
-        raise HedFileError(HedExceptions.HED_SCHEMA_VERSION_INVALID,
+        raise HedFileError(HedExceptions.SCHEMA_VERSION_INVALID,
                            "No version attribute found in header", filename=filename)
 
 
diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
index c47fff94..c1aad366 160000
--- a/spec_tests/hed-specification
+++ b/spec_tests/hed-specification
@@ -1 +1 @@
-Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68
+Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8
diff --git a/spec_tests/test_errors.py b/spec_tests/test_errors.py
index 972d53d4..3e87fdbd 100644
--- a/spec_tests/test_errors.py
+++ b/spec_tests/test_errors.py
@@ -12,55 +12,11 @@
 from hed.errors import ErrorHandler, get_printable_issue_string
 
 
-# To be removed eventually once all errors are being verified.
-known_errors = [
-    'SIDECAR_INVALID',
-    'CHARACTER_INVALID',
-    'COMMA_MISSING',
-    "DEF_EXPAND_INVALID",
-    "DEF_INVALID",
-    "DEFINITION_INVALID",
-    "NODE_NAME_EMPTY",
-    "ONSET_OFFSET_INSET_ERROR",
-    "PARENTHESES_MISMATCH",
-    "PLACEHOLDER_INVALID",
-    "REQUIRED_TAG_MISSING",
-    "SIDECAR_INVALID",
-    "SIDECAR_KEY_MISSING",
-    "STYLE_WARNING",
-    "TAG_EMPTY",
-    "TAG_EXPRESSION_REPEATED",
-    "TAG_EXTENDED",
-    "TAG_EXTENSION_INVALID",
-    "TAG_GROUP_ERROR",
-    "TAG_INVALID",
-    "TAG_NOT_UNIQUE",
-    "TAG_NAMESPACE_PREFIX_INVALID",
-    "TAG_REQUIRES_CHILD",
-    "TILDES_UNSUPPORTED",
-    "UNITS_INVALID",
-    "UNITS_MISSING",
-    "VALUE_INVALID",
-
-    "SIDECAR_BRACES_INVALID",
-    "SCHEMA_LIBRARY_INVALID",
-
-    "SCHEMA_ATTRIBUTE_INVALID",
-    "SCHEMA_UNIT_CLASS_INVALID",
-    "SCHEMA_VALUE_CLASS_INVALID",
-    "SCHEMA_DEPRECATED_INVALID",
-    "SCHEMA_SUGGESTED_TAG_INVALID",
-    "SCHEMA_RELATED_TAG_INVALID",
-    "SCHEMA_NON_PLACEHOLDER_HAS_CLASS",
-    "SCHEMA_DEFAULT_UNITS_INVALID"
-]
-
 skip_tests = {
     "VERSION_DEPRECATED": "Not applicable",
     "tag-extension-invalid-bad-node-name": "Part of character invalid checking/didn't get to it yet",
 }
 
-
 class MyTestCase(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -80,9 +36,7 @@ def run_single_test(self, test_file):
             test_info = json.load(fp)
         for info in test_info:
             error_code = info['error_code']
-            verify_code = False
-            if error_code in known_errors:
-                verify_code = True
+            verify_code = True
             # To be deprecated once we add this to all tests
             self._verify_code = verify_code
             if error_code in skip_tests:
@@ -93,6 +47,8 @@ def run_single_test(self, test_file):
                 print(f"Skipping {name} test because: {skip_tests[name]}")
                 continue
 
+            # if name != "attribute-invalid-in-library":
+            #     continue
             description = info['description']
             schema = info['schema']
             check_for_warnings = info.get("warning", False)
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
index a596775c..e2246335 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_badroot_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
index 672792aa..2b76a3a4 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_dupesubroot_0.0.1.mediawiki
@@ -17,6 +17,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
index d5e6cf44..678a6249 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid1.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
index 979f72bd..037c9bc7 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid2.mediawiki
@@ -13,6 +13,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
index 3438be07..f79d8361 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_invalid3.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
index 267a214e..80454ef4 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/HED_root_wrong_place_0.0.1.mediawiki
@@ -11,6 +11,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
index ee20104a..d3368e37 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags1.mediawiki
@@ -33,6 +33,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 
 '''Unit classes'''
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
 
 
 '''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
index 8b3a3a86..64144708 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags2.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 
 '''Unit classes'''
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 
 
 '''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
index 7939dfd9..f8bccd4d 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags3.mediawiki
@@ -32,6 +32,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 
 '''Unit classes'''
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 
 
 '''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
index 4a084ebd..eb283125 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_tags4.mediawiki
@@ -33,6 +33,14 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 
 '''Unit classes'''
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 
 
 '''Epilogue'''
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
index f282aabb..289265f8 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_unit_classes.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 * weightUnits <nowiki>{defaultUnits=testUnit}</nowiki>
 ** testUnit <nowiki>{conversionFactor=100}</nowiki>
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
 
 '''Epilogue'''
 The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
index b7c4d5aa..ac67b8fe 100644
--- a/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
+++ b/tests/data/schema_tests/merge_tests/issues_tests/overlapping_units.mediawiki
@@ -34,6 +34,13 @@ For more information see https://hed-schema-library.readthedocs.io/en/latest/ind
 * weightUnitsNew <nowiki>{defaultUnits=g}</nowiki>
 ** g <nowiki>{conversionFactor=100}</nowiki>
 
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
 
 '''Epilogue'''
 The Standardized Computer-based Organized Reporting of EEG (SCORE) is a standard terminology for scalp EEG data assessment designed for use in clinical practice that may also be used for research purposes.
diff --git a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
index d5e31f3b..6536476c 100644
--- a/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
+++ b/tests/data/schema_tests/merge_tests/sorted_root.mediawiki
@@ -44,6 +44,16 @@ This schema is the first official release that includes an xsd and requires unit
 
 !# end schema
 
+'''Unit classes'''
+
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
 '''Epilogue'''
 
 !# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
index 049260f1..4327c6a4 100644
--- a/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
+++ b/tests/data/schema_tests/wiki_tests/HED_default.mediawiki
@@ -1,6 +1,6 @@
 HED version:8.0.0-alpha.1
 
-
+'''Prologue'''
 This is a prologue line.
 This is a second prologue line.
 
@@ -1098,7 +1098,15 @@ This is a second prologue line.
 * z <nowiki>{SIUnitSymbolModifier} [SI unit submultiple representing 10^-21]</nowiki>
 * yocto <nowiki>{SIUnitModifier} [SI unit submultiple representing 10^-24]</nowiki>
 * y <nowiki>{SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]</nowiki>
-!# end hed
 
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+
+'''Epilogue'''
 This is an epilogue.
-This is a second line of an epilogue.
\ No newline at end of file
+This is a second line of an epilogue.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
new file mode 100644
index 00000000..d2c398e3
--- /dev/null
+++ b/tests/data/schema_tests/wiki_tests/attribute_unknown1.mediawiki
@@ -0,0 +1,41 @@
+HED version="8.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://github.com/hed-standard/hed-specification/raw/master/hedxml/HED8.0.0.xsd"
+
+'''Prologue'''
+This schema tests AppendixB  SCHEMA_ATTRIBUTE_INVALID
+
+!# start schema
+
+'''Tag1''' <nowiki>{suggestedTag=Tag1}[suggested tag is not registered in the schema]</nowiki>
+* Tag2 <nowiki>{valueClassAttribute}[value attribute is the wrong tag class]</nowiki>
+* Tag3 <nowiki>{unitAttribute}[unit attribute is the wrong tag class]</nowiki>
+
+!# end schema
+'''Unit classes''' <nowiki></nowiki>
+* unitClass1 <nowiki>{unitAttribute}[Wrong attribute type]</nowiki>
+** unit1 <nowiki>{tagAttribute}[Wrong attribute type]</nowiki>
+
+'''Unit modifiers''' <nowiki></nowiki>
+* mod1 <nowiki>{tagAttribute}[Wrong attribute type]</nowiki>
+
+'''Value classes''' <nowiki></nowiki>
+* valueClass1 <nowiki>{tagAttribute}[Wrong attribute type]</nowiki>
+
+'''Schema attributes''' <nowiki></nowiki>
+* tagAttribute <nowiki></nowiki>
+* unitAttribute <nowiki>{unitProperty}</nowiki>
+* unitClassAttribute <nowiki>{unitClassProperty}</nowiki>
+* unitModifierAttribute <nowiki>{unitModifierProperty}</nowiki>
+* valueClassAttribute <nowiki>{valueClassProperty}</nowiki>
+* attribute1 <nowiki>{valueClassProperty}</nowiki>
+
+'''Properties''' <nowiki></nowiki>
+* boolProperty <nowiki></nowiki>
+* unitClassProperty <nowiki></nowiki>
+* unitModifierProperty <nowiki></nowiki>
+* unitProperty <nowiki></nowiki>
+* valueClassProperty <nowiki></nowiki>
+
+'''Epilogue'''
+This is an updated version of the schema format. The properties are now part of the schema. The schema attributes are designed to be checked in software rather than hard-coded. The schema attributes, themselves have properties.
+
+!# end hed
\ No newline at end of file
diff --git a/tests/data/validator_tests/bids_schema.mediawiki b/tests/data/validator_tests/bids_schema.mediawiki
index 971a9723..b306003b 100644
--- a/tests/data/validator_tests/bids_schema.mediawiki
+++ b/tests/data/validator_tests/bids_schema.mediawiki
@@ -1,5 +1,7 @@
 HED version: 8.0.0-alpha.2
 
+'''Prologue'''
+
 !# start schema 
 
 '''Event''' 
@@ -1163,6 +1165,7 @@ HED version: 8.0.0-alpha.2
 * yocto <nowiki>{SIUnitModifier} [SI unit submultiple representing 10^-24]</nowiki>
 * y <nowiki>{SIUnitSymbolModifier} [SI unit submultiple representing 10^-24]</nowiki>
 
+'''Value classes'''
 
 '''Schema attributes'''
 * allowedCharacter <nowiki>{unitClassProperty}[An attribute of unit classes schema value placeholders indicating a special character that is allowed in expressing the value of that placeholder.]</nowiki>
@@ -1184,6 +1187,8 @@ HED version: 8.0.0-alpha.2
 * unitSymbol <nowiki>{boolProperty, unitProperty}[Abbreviation or symbol representing a type of unit. Unit symbols  represent both the singular and the plural and thus cannot be pluralized.]</nowiki>
 * unitClass <nowiki>[Specifies the type of a unit for a tag.]</nowiki>
 
+'''Properties'''
+
 '''Epilogue'''
 This is the new format for the mediawiki schema
 
diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py
index c21d839a..75f66d17 100644
--- a/tests/schema/test_hed_schema_io.py
+++ b/tests/schema/test_hed_schema_io.py
@@ -245,10 +245,10 @@ def _base_added_class_tests(self, schema):
 
         unit_class_entry = schema.unit_classes["weightUnits"]
         unit_entry = unit_class_entry.units["testUnit"]
-        self.assertEqual(unit_entry.attributes["conversionFactor"], str(100))
+        self.assertEqual(unit_entry.attributes[HedKey.ConversionFactor], str(100))
 
         unit_modifier_entry = schema.unit_modifiers["huge"]
-        self.assertEqual(unit_modifier_entry.attributes["conversionFactor"], "10^100")
+        self.assertEqual(unit_modifier_entry.attributes[HedKey.ConversionFactor], "10^100")
         self.assertTrue(unit_modifier_entry.attributes["customElementAttribute"])
 
         value_class_entry = schema.value_classes["customValueClass"]
@@ -328,9 +328,9 @@ def test_cannot_load_schemas(self):
 
         ]
         for file in files:
-            with self.assertRaises(HedFileError):
-                # print(file)
+            with self.assertRaises(HedFileError) as context:
                 load_schema(file)
+            self.assertEqual(context.exception.code, HedExceptions.SCHEMA_LIBRARY_INVALID)
 
     def test_saving_in_library_wiki(self):
         old_score_schema = load_schema_version("score_1.0.0")
diff --git a/tests/schema/test_schema_wiki_fatal_errors.py b/tests/schema/test_schema_wiki_fatal_errors.py
index 583579b1..0759dba4 100644
--- a/tests/schema/test_schema_wiki_fatal_errors.py
+++ b/tests/schema/test_schema_wiki_fatal_errors.py
@@ -1,7 +1,7 @@
 import unittest
 import os
 
-from hed import schema
+from hed import load_schema
 from hed.errors import HedFileError, HedExceptions
 
 
@@ -12,25 +12,25 @@ class TestHedSchema(unittest.TestCase):
     def setUpClass(cls):
         cls.full_base_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), cls.base_schema_dir)
         cls.files_and_errors = {
-            "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_START_MISSING,
-            "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_END_INVALID,
-            "HED_hed_no_end.mediawiki": HedExceptions.HED_END_INVALID,
-            "HED_separator_invalid.mediawiki": HedExceptions.INVALID_SECTION_SEPARATOR,
+            "HED_schema_no_start.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+            "HED_schema_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+            "HED_hed_no_end.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+            "HED_separator_invalid.mediawiki": HedExceptions.WIKI_SEPARATOR_INVALID,
             "HED_header_missing.mediawiki": HedExceptions.SCHEMA_HEADER_MISSING,
-            "HED_header_invalid.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
-            "empty_file.mediawiki": HedExceptions.HED_SCHEMA_HEADER_INVALID,
-            "HED_header_invalid_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
-            "HED_header_missing_version.mediawiki": HedExceptions.HED_SCHEMA_VERSION_INVALID,
+            "HED_header_invalid.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+            "empty_file.mediawiki": HedExceptions.SCHEMA_HEADER_INVALID,
+            "HED_header_invalid_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
+            "HED_header_missing_version.mediawiki": HedExceptions.SCHEMA_VERSION_INVALID,
             "HED_header_bad_library.mediawiki": HedExceptions.BAD_HED_LIBRARY_NAME,
-            "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_START_MISSING,
-            "empty_node.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line2.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line3.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line4.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line5.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line6.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
-            "malformed_line7.mediawiki": HedExceptions.HED_WIKI_DELIMITERS_INVALID,
+            "HED_schema_out_of_order.mediawiki": HedExceptions.SCHEMA_SECTION_MISSING,
+            "empty_node.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line2.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line3.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line4.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line5.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line6.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
+            "malformed_line7.mediawiki": HedExceptions.WIKI_DELIMITERS_INVALID,
             "empty_node.xml": HedExceptions.HED_SCHEMA_NODE_NAME_INVALID
         }
 
@@ -60,9 +60,10 @@ def test_invalid_schema(self):
         for filename, error in self.files_and_errors.items():
             full_filename = self.full_base_folder + filename
             with self.assertRaises(HedFileError) as context:
-                schema.load_schema(full_filename)
+                load_schema(full_filename)
                 # all of these should produce exceptions.
-            from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+            from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+
             # Verify basic properties of exception
             expected_line_numbers = self.expected_line_numbers.get(filename, [])
             if expected_line_numbers:
@@ -82,9 +83,10 @@ def test_merging_errors_schema(self):
         for filename, error in self.files_and_errors.items():
             full_filename = self.full_base_folder + filename
             with self.assertRaises(HedFileError) as context:
-                schema.load_schema(full_filename)
+                load_schema(full_filename)
                 # all of these should produce exceptions.
-            from hed.errors import ErrorHandler, ErrorContext, SchemaErrors, get_printable_issue_string
+            from hed.errors import ErrorHandler, ErrorContext, get_printable_issue_string
+            from hed.errors.error_types import SchemaAttributeErrors
             # Verify basic properties of exception
             expected_line_numbers = self.expected_line_numbers.get(filename, [])
             if expected_line_numbers:
@@ -96,7 +98,7 @@ def test_merging_errors_schema(self):
             error_handler.push_error_context(ErrorContext.ROW, 1)
             error_handler.push_error_context(ErrorContext.COLUMN, 2)
 
-            issues = error_handler.format_error_with_context(SchemaErrors.SCHEMA_ATTRIBUTE_INVALID,
+            issues = error_handler.format_error_with_context(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_INVALID,
                                                              "error_attribute", source_tag="error_tag")
             error_handler.pop_error_context()
             error_handler.pop_error_context()
@@ -106,3 +108,9 @@ def test_merging_errors_schema(self):
 
             self.assertTrue(context.exception.args[0] == error)
             self.assertTrue(context.exception.filename == full_filename)
+
+    def test_attribute_invalid(self):
+        path = os.path.join(self.full_base_folder, "attribute_unknown1.mediawiki")
+        schema = load_schema(path)
+        issues = schema.check_compliance()
+        self.assertEqual(len(issues), 7)
\ No newline at end of file
diff --git a/tests/schema/util_create_schemas.py b/tests/schema/util_create_schemas.py
index 850d014e..415b94dc 100644
--- a/tests/schema/util_create_schemas.py
+++ b/tests/schema/util_create_schemas.py
@@ -10,13 +10,30 @@
 """
 
 library_schema_end = """
-!# end schema
+
 
 !# end hed
     """
 
-def _get_test_schema(node_lines):
-    library_schema_string = library_schema_start + "\n".join(node_lines) + library_schema_end
+default_end_lines = """
+!# end schema
+"""
+
+required_non_tag = [
+"'''Unit classes'''",
+"'''Unit modifiers'''",
+"'''Value classes'''",
+"'''Schema attributes'''",
+"'''Properties'''",
+"'''Epilogue'''"
+]
+def _get_test_schema(node_lines, other_lines=(default_end_lines,)):
+    node_section = "\n".join(node_lines)
+    non_tag_section = "\n".join(other_lines)
+    for name in required_non_tag:
+        if name not in other_lines:
+            non_tag_section += f"\n{name}\n"
+    library_schema_string = library_schema_start + node_section + non_tag_section + library_schema_end
     test_schema = from_string(library_schema_string, ".mediawiki")
 
     return test_schema

From c69b7f6cb0ac9cf2d48237669894bb6cb43b0f24 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 29 Sep 2023 11:22:39 +0000
Subject: [PATCH 03/21] Bump spec_tests/hed-specification from `c47fff9` to
 `c1aad36`

Bumps [spec_tests/hed-specification](https://github.com/hed-standard/hed-specification) from `c47fff9` to `c1aad36`.
- [Release notes](https://github.com/hed-standard/hed-specification/releases)
- [Commits](https://github.com/hed-standard/hed-specification/compare/c47fff949db70c9105c875bbdfdf0d11389ffd68...c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8)

---
updated-dependencies:
- dependency-name: spec_tests/hed-specification
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 spec_tests/hed-specification | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
index c47fff94..c1aad366 160000
--- a/spec_tests/hed-specification
+++ b/spec_tests/hed-specification
@@ -1 +1 @@
-Subproject commit c47fff949db70c9105c875bbdfdf0d11389ffd68
+Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8

From dfbbd64c1ac98ce7b636dfe34f70b7511c22ad9d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 3 Oct 2023 11:57:26 +0000
Subject: [PATCH 04/21] Bump spec_tests/hed-specification from `c1aad36` to
 `7f3868b`

Bumps [spec_tests/hed-specification](https://github.com/hed-standard/hed-specification) from `c1aad36` to `7f3868b`.
- [Release notes](https://github.com/hed-standard/hed-specification/releases)
- [Commits](https://github.com/hed-standard/hed-specification/compare/c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8...7f3868bdff542db5660e2bd1a3d0d565e3793502)

---
updated-dependencies:
- dependency-name: spec_tests/hed-specification
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 spec_tests/hed-specification | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spec_tests/hed-specification b/spec_tests/hed-specification
index c1aad366..7f3868bd 160000
--- a/spec_tests/hed-specification
+++ b/spec_tests/hed-specification
@@ -1 +1 @@
-Subproject commit c1aad366fee6c7f1e68fbd73d2ce6dc369444ad8
+Subproject commit 7f3868bdff542db5660e2bd1a3d0d565e3793502

From 0014ef4fb415999a69f1f7aa3c7e5d0d49b07456 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 5 Oct 2023 16:27:07 -0500
Subject: [PATCH 05/21] Update basic_search to support words taht must not
 appear

---
 hed/models/basic_search.py        |  88 ++++++-----
 tests/models/test_basic_search.py | 243 +++++++++++++++++-------------
 2 files changed, 189 insertions(+), 142 deletions(-)

diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py
index ae47b71e..9301a0cc 100644
--- a/hed/models/basic_search.py
+++ b/hed/models/basic_search.py
@@ -8,14 +8,18 @@ def find_matching(series, search_string, regex=False):
     """ Finds lines in the series that match the search string and returns a mask.
 
     Syntax Rules:
-        - '@': Prefixing a term in the search string means the object must appear anywhere within a line.
+        - '@': Prefixing a term in the search string means the term must appear anywhere within a line.
+        - '~': Prefixing a term in the search string means the term must NOT appear within a line.
         - Parentheses: Elements within parentheses must appear in the line with the same level of nesting.
-                eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't
-                    start in the same group.
+                e.g.: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't
+                      start in the same group.
         - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis)
         - An individual term can be arbitrary regex, but it is limited to single continuous words.
 
     Notes:
+        - Specific words only care about their level relative to other specific words, not overall.
+              e.g. "(A, B)" will find: "A, B", "(A, B)", (A, (C), B)", or ((A, B))"
+        - If you have no grouping or anywhere words in the search, it assumes all terms are anywhere words.
         - The format of the series should match the format of the search string, whether it's in short or long form.
         - To enable support for matching parent tags, ensure that both the series and search string are in long form.
 
@@ -33,19 +37,19 @@ def find_matching(series, search_string, regex=False):
     if not regex:
         # Replace *'s with a reasonable value for people who don't know regex
         search_string = re.sub(r'(?<!\.)\*', '.*?', search_string)
-    anywhere_words, specific_words = find_words(search_string)
+    anywhere_words, negative_words, specific_words = find_words(search_string)
+    # If we have no nesting or anywhere words, assume they don't care about level
+    if "(" not in search_string and "@" not in search_string:
+        anywhere_words += specific_words
+        specific_words = []
     delimiter_map = construct_delimiter_map(search_string, specific_words)
-    source_words = anywhere_words + specific_words
+    candidate_indexes = _verify_basic_words(series, anywhere_words, negative_words)
 
-    # Create a set of series of masks to determine which rows contain each individual word
-    candidate_indexes = set(series.index)
-
-    # Loop through source_words to filter down candidate_indexes
-    for word in source_words:
+    # do a basic check for all specific words(this doesn't verify word delimiters)
+    for word in specific_words:
         matches = series.str.contains(word, regex=True)
         current_word_indexes = set(matches[matches].index.tolist())
 
-        # Update candidate_indexes by taking the intersection with current_word_indexes
         candidate_indexes &= current_word_indexes
 
         if not candidate_indexes:
@@ -53,40 +57,63 @@ def find_matching(series, search_string, regex=False):
 
     candidate_indexes = sorted(candidate_indexes)
 
-    full_mask = pd.Series(False, index=series.index)
-
-    candidate_series = series[candidate_indexes]
+    full_mask = pd.Series(False, index=series.index, dtype=bool)
 
-    mask = candidate_series.apply(verify_search_delimiters, args=(anywhere_words, specific_words, delimiter_map))
-    full_mask.loc[candidate_indexes] = mask
+    if candidate_indexes:
+        if specific_words:
+            candidate_series = series[candidate_indexes]
+            mask = candidate_series.apply(verify_search_delimiters, args=(specific_words, delimiter_map))
+            full_mask.loc[candidate_indexes] = mask
+        else:
+            full_mask.loc[candidate_indexes] = True
 
     return full_mask
 
 
+def _get_word_indexes(series, word):
+    pattern = r'(?:[ ,()]|^)' + word + r'(?:[ ,()]|$)'
+    matches = series.str.contains(pattern, regex=True)
+    return set(matches[matches].index.tolist())
+
+
+def _verify_basic_words(series, anywhere_words, negative_words):
+    candidate_indexes = set(series.index)
+    for word in anywhere_words:
+        current_word_indexes = _get_word_indexes(series, word)
+        candidate_indexes &= current_word_indexes
+
+    for word in negative_words:
+        current_word_indexes = _get_word_indexes(series, word)
+        candidate_indexes -= current_word_indexes
+    return candidate_indexes
+
+
 def find_words(search_string):
-    """ Extract all words in the search string.  Dividing them into words that must be relative to each other,
-         and words that can be anywhere.
+    """
+    Extract words in the search string based on their prefixes.
 
     Args:
         search_string (str): The search query string to parse.
-                             Words prefixed with '@' are 'anywhere' words.
+                             Words can be prefixed with '@' or '~'.
 
     Returns:
-        tuple: A tuple containing two lists:
-            - anywhere_words (list of str): Words that can appear anywhere in the text.
-            - specific_words (list of str): Words that must appear relative to other terms.
+        list: A list containing three lists:
+            - Words prefixed with '@'
+            - Words prefixed with '~'
+            - Words with no prefix
     """
-    # Match sequences of characters that are not commas, parentheses, or standalone spaces.
+    # Match sequences of characters that are not commas or parentheses.
     pattern = r'[^,()]+'
     words = re.findall(pattern, search_string)
 
     # Remove any extraneous whitespace from each word
     words = [word.strip() for word in words if word.strip()]
 
-    anywhere_words = [word[1:] for word in words if word.startswith("@")]
-    specific_words = [word for word in words if not word.startswith("@")]
+    at_words = [word[1:] for word in words if word.startswith("@")]
+    tilde_words = [word[1:] for word in words if word.startswith("~")]
+    no_prefix_words = [word for word in words if not word.startswith("~") and not word.startswith("@")]
 
-    return anywhere_words, specific_words
+    return [at_words, tilde_words, no_prefix_words]
 
 
 def check_parentheses(text):
@@ -180,12 +207,11 @@ def construct_delimiter_map(text, words):
     return delimiter_map
 
 
-def verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map):
+def verify_search_delimiters(text, specific_words, delimiter_map):
     """ Verifies if the text contains specific words with expected delimiters between them.
 
     Args:
         text (str): The text to search in.
-        anywhere_words (list of str): Words that can appear anywhere in the text.
         specific_words (list of str): Words that must appear relative to other words in the text
         delimiter_map (dict): A dictionary specifying expected delimiters between pairs of specific words.
 
@@ -194,12 +220,6 @@ def verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map
     """
     locations = defaultdict(list)
 
-    # Check for anywhere words
-    for word in anywhere_words:
-        pattern = r'(?:[ ,()]|^)(' + word + r')(?:[ ,()]|$)'
-        if not any(re.finditer(pattern, text)):
-            return False
-
     # Find all locations for each word in the text
     for word in specific_words:
         for match in re.finditer(r'(?:[ ,()]|^)(' + word + r')(?:[ ,()]|$)', text):
diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py
index 0a942b93..36fcc168 100644
--- a/tests/models/test_basic_search.py
+++ b/tests/models/test_basic_search.py
@@ -35,37 +35,37 @@ def test_find_matching_results(self):
 class TestFindWords(unittest.TestCase):
     def test_basic(self):
         search_string = "@global (local1, local2)"
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, ['global'])
         self.assertEqual(specific_words, ['local1', 'local2'])
 
     def test_no_anywhere_words(self):
         search_string = "(local1, local2)"
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, [])
         self.assertEqual(specific_words, ['local1', 'local2'])
 
     def test_no_specific_words(self):
         search_string = "@global1, @global2"
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, ['global1', 'global2'])
         self.assertEqual(specific_words, [])
 
     def test_empty_string(self):
         search_string = ""
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, [])
         self.assertEqual(specific_words, [])
 
     def test_mixed_words(self):
         search_string = "@global (local1, local2), @another_global"
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, ['global', 'another_global'])
         self.assertEqual(specific_words, ['local1', 'local2'])
 
     def test_whitespace(self):
         search_string = " @Global ,  ( local1 , local2 ) "
-        anywhere_words, specific_words = find_words(search_string)
+        anywhere_words, _, specific_words = find_words(search_string)
         self.assertEqual(anywhere_words, ['Global'])
         self.assertEqual(specific_words, ['local1', 'local2'])
 
@@ -138,118 +138,30 @@ def test_multiple_words(self):
 
 
 class TestVerifyDelimiters(unittest.TestCase):
-    def base_verify_func(self, query_text, text, anywhere_words, specific_words, expected_result):
+    def base_verify_func(self, query_text, text, specific_words, expected_result):
         delimiter_map = construct_delimiter_map(query_text, specific_words)
-        actual_result = verify_search_delimiters(text, anywhere_words, specific_words, delimiter_map)
+        actual_result = verify_search_delimiters(text, specific_words, delimiter_map)
         self.assertEqual(actual_result, expected_result)
 
     def test_all_conditions_met(self):
         query_text = "word0,((word1),word2)"
         specific_words = ["word0", "word1", "word2"]
         text = "word0,((word1),word2)"
-        self.base_verify_func(query_text, text, [], specific_words, True)
+        self.base_verify_func(query_text, text, specific_words, True)
         text = "((word1),word2), word0"
-        self.base_verify_func(query_text, text, [], specific_words, True)
+        self.base_verify_func(query_text, text, specific_words, True)
         text = "word0,(word2, (word1))"
-        self.base_verify_func(query_text, text, [], specific_words, True)
+        self.base_verify_func(query_text, text, specific_words, True)
         text = "word0,((word1),(ExtraGroup),word2)"
-        self.base_verify_func(query_text, text, [], specific_words, True)
+        self.base_verify_func(query_text, text, specific_words, True)
         text = "word0,((word2),word1)"
-        self.base_verify_func(query_text, text, [], specific_words, False)
+        self.base_verify_func(query_text, text, specific_words, False)
         text = "((word1),word0), word2"
-        self.base_verify_func(query_text, text, [], specific_words, False)
+        self.base_verify_func(query_text, text, specific_words, False)
         text = "word0,((word1))"
-        self.base_verify_func(query_text, text, [], specific_words, False)
+        self.base_verify_func(query_text, text, specific_words, False)
         text = "(word1),(ExtraGroup),word2)"
-        self.base_verify_func(query_text, text, [], specific_words, False)
-
-    def test_complex_case_with_word_identifiers(self):
-        query_text = "word0,((word1),@word2,@word3,word4)"
-        specific_words = ["word0", "word1", "word4"]
-        anywhere_words = ["word2", "word3"]
-        text = "word0,((word1),word2,word3,word4)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-        text = "word2,word0,((word1),word3,word4)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-        text = "word3,((word1),word2,word4),word0"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-        text = "word0,((word1),word4),word2,word3"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-        text = "word0,word1,word4,word2"  # Incorrect delimiters
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-        text = "word2,word3"  # Missing specific words
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-    def test_very_complex_case_with_word_identifiers(self):
-        query_text = "word0,(((word1,word2),@word3)),((word4,word5)))"
-        specific_words = ["word0", "word1", "word2", "word4", "word5"]
-        anywhere_words = ["word3"]
-
-        # Test case where all conditions are met
-        text = "word0,(((word1,word2),word3)),((word4,word5)))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Test case with anywhere words out of specific context but still in the string
-        text = "word3,word0,(((word1,word2))),((word4,word5)))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Test case with correct specific words but incorrect delimiters
-        text = "word0,((word1,word2),word3),(word4,word5)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-        # Test case missing one specific word
-        text = "word0,(((word1,word2),word3)),(word4))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-        # Test case missing anywhere word
-        text = "word0,(((word1,word2))),((word4,word5)))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-    def test_incorrect_single_delimiter(self):
-        query_text = "word0,((word1)),word2"
-        specific_words = ["word0", "word1", "word2"]
-        anywhere_words = []
-
-        # Positive case 1: Exact match
-        text = "word0,((word1)),word2"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Positive case 2: Additional parentheses around the entire sequence
-        text = "(word0,((word1)),word2)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Single closing parenthesis missing between word1 and word2
-        text = "word0,((word1),word2)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-        # Single opening parenthesis missing between word0 and word1
-        text = "word0,(word1)),word2"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-    def test_mismatched_parentheses(self):
-        query_text = "word0,((word1)),(word2,word3)"
-        specific_words = ["word0", "word1", "word2", "word3"]
-        anywhere_words = []
-
-        # Positive case 1: Exact match
-        text = "word0,((word1)),(word2,word3)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Positive case 2: Reordered sequence with the same delimiters
-        text = "(word2,word3),word0,((word1))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Positive case 3: Additional text in between but the delimiters remain the same
-        text = "word0,someExtraText,((word1)),someMoreText,(word2,word3)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, True)
-
-        # Extra closing parenthesis between word2 and word3
-        text = "word0,((word1),(word2,word3))"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
-
-        # Extra opening parenthesis between word1 and word2
-        text = "word0,((word1),((word2,word3)"
-        self.base_verify_func(query_text, text, anywhere_words, specific_words, False)
+        self.base_verify_func(query_text, text, specific_words, False)
 
     def test_wildcard_matching_verify_delimiters(self):
         query_text = "word0, ((word1.*?)), word2.*?"
@@ -257,14 +169,15 @@ def test_wildcard_matching_verify_delimiters(self):
 
         # Positive test cases
         text = "((word1)), word0, word2X"
-        self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+        self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
 
         text = "word0, ((word1Y)), word2Z"
-        self.assertTrue(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+        self.assertTrue(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
 
         # Negative test cases
         text = "word0, (word1), word2"
-        self.assertFalse(verify_search_delimiters(text, [], ["word0", "word1.*?", "word2.*?"], delimiter_map))
+        self.assertFalse(verify_search_delimiters(text, ["word0", "word1.*?", "word2.*?"], delimiter_map))
+
 
 class TestFindMatching(unittest.TestCase):
     def base_find_matching(self, series, search_string, expected):
@@ -272,6 +185,18 @@ def base_find_matching(self, series, search_string, expected):
         self.assertTrue(all(mask == expected), f"Expected {expected}, got {mask}")
 
     def test_basic_matching(self):
+        series = pd.Series([
+            "word0, word1, word2",
+            "word0, (word1, word2)"
+        ])
+        search_string = "word0, word1"
+        expected = pd.Series([True, True])
+        self.base_find_matching(series, search_string, expected)
+        search_string = "(word0, word1)"
+        expected = pd.Series([True, False])
+        self.base_find_matching(series, search_string, expected)
+
+    def test_group_matching(self):
         series = pd.Series([
             "(word1), word0, ((word2))",
             "word0, ((word1)), word2",
@@ -306,8 +231,110 @@ def test_wildcard_matching(self):
         series = pd.Series([
             "word2, word0, ((word1X))",
             "word0, ((word1Y)), word2Z",
+            "word0, ((word1)), word2",
             "word0, (word1), word2"
         ])
         search_string = "word0, ((word1*)), word2*"
-        expected = pd.Series([True, True, False])
+        expected = pd.Series([True, True, True, False])
+        self.base_find_matching(series, search_string, expected)
+
+    def test_complex_case_with_word_identifiers(self):
+        query_text = "word0, ((word1), @word2, @word3, word4)"
+        series = pd.Series([
+            "word0, ((word1), word2, word3, word4)",
+            "word2, word0, ((word1), word3, word4)",
+            "word3, ((word1), word2, word4), word0",
+            "word0, ((word1), word4), word2, word3",
+            "word0, word1, word4, word2",
+            "word2, word3"
+        ])
+        expected = pd.Series([True, True, True, True, False, False])
+
+        self.base_find_matching(series, query_text, expected)
+
+    def test_very_complex_case_with_word_identifiers(self):
+        query_text = "word0, (((word1, word2), @word3)), ((word4, word5)))"
+        series = pd.Series([
+            "word0, (((word1, word2), word3)), ((word4, word5)))",
+            "word3, word0, (((word1, word2))), ((word4, word5)))",
+            "word0, ((word1, word2), word3), (word4, word5)",
+            "word0, (((word1, word2), word3)), (word4)",
+            "word0, (((word1, word2))), ((word4, word5)))"
+        ])
+        expected = pd.Series([True, True, False, False, False])
+
+        self.base_find_matching(series, query_text, expected)
+
+    def test_incorrect_single_delimiter(self):
+        query_text = "word0, ((word1)), word2"
+        series = pd.Series([
+            "word0, ((word1)), word2",
+            "(word0, ((word1)), word2)",
+            "word0, ((word1), word2)",
+            "word0, (word1)), word2"
+        ])
+        expected = pd.Series([True, True, False, False])
+        self.base_find_matching(series, query_text, expected)
+
+    def test_mismatched_parentheses2(self):
+        query_text = "word0, ((word1)), (word2, word3)"
+        series = pd.Series([
+            "word0, ((word1)), (word2, word3)",
+            "(word2, word3), word0, ((word1))",
+            "word0, someExtraText, ((word1)), someMoreText, (word2, word3)",
+            "word0, ((word1), (word2, word3))",
+            "word0, ((word1), ((word2, word3)"
+        ])
+        expected = pd.Series([True, True, True, False, False])
+        self.base_find_matching(series, query_text, expected)
+
+    def test_negative_words(self):
+        series = pd.Series([
+            "word0, word1",
+            "word0, word2",
+            "word0, word2, word3",
+            "word0, (word1), word2",
+            "word0, (word2, word3), word1",
+            "word0, word1suffix",
+        ])
+
+        # 1. Basic Negative Test Case
+        search_string1 = "~word1, word0"
+        expected1 = pd.Series([False, True, True, False, False, True])
+
+        # 2. Two Negative Words
+        search_string2 = "~word1, ~word3, word0"
+        expected2 = pd.Series([False, True, False, False, False, True])
+
+        # 3. Combination of Negative and Mandatory Words
+        search_string3 = "@word2, ~word1, word0"
+        expected3 = pd.Series([False, True, True, False, False, False])
+
+        # 4. Negative Words with Wildcards
+        search_string4 = "word0, ~word1*"
+        expected4 = pd.Series([False, True, True, False, False, False])
+
+        # Running tests
+        self.base_find_matching(series, search_string1, expected1)
+        self.base_find_matching(series, search_string2, expected2)
+        self.base_find_matching(series, search_string3, expected3)
+        self.base_find_matching(series, search_string4, expected4)
+
+    def test_negative_words_group(self):
+        series = pd.Series([
+            "word0, (word1, (word2))",
+            "word0, (word1, (word2)), word3",
+            "word0, (word1, (word2), word3)",
+            "word0, (word1, (word2, word3))",
+            ])
+        search_string = "word0, (word1, (word2))"
+        expected = pd.Series([True, True, True, True])
+        self.base_find_matching(series, search_string, expected)
+
+        search_string = "word0, (word1, (word2)), ~word3"
+        expected = pd.Series([True, False, False, False])
+        self.base_find_matching(series, search_string, expected)
+
+        search_string = "word0, (word1, (word2), ~word3)"
+        expected = pd.Series([True, False, False, False])
         self.base_find_matching(series, search_string, expected)

From f02a1218f2c222901105ee4a5be97057ac2c3e6b Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Wed, 11 Oct 2023 12:09:24 -0500
Subject: [PATCH 06/21] Make validation row numbers 1 based, and account for
 header

---
 hed/validator/spreadsheet_validator.py | 18 ++++++++++++------
 tests/models/test_tabular_input.py     |  3 ++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py
index ebd64795..751af961 100644
--- a/hed/validator/spreadsheet_validator.py
+++ b/hed/validator/spreadsheet_validator.py
@@ -45,23 +45,28 @@ def validate(self, data, def_dicts=None, name=None, error_handler=None):
         self._hed_validator = HedValidator(self._schema, def_dicts=def_dicts)
         self._onset_validator = OnsetValidator()
         onset_filtered = None
+        # Adjust to account for 1 based
+        row_adj = 1
         if isinstance(data, BaseInput):
-            issues += self._validate_column_structure(data, error_handler)
+            # Adjust to account for column names
+            if data.has_column_names:
+                row_adj += 1
+            issues += self._validate_column_structure(data, error_handler, row_adj)
             onset_filtered = data.series_filtered
             data = data.dataframe_a
 
         # Check the rows of the input data
-        issues += self._run_checks(data, onset_filtered, error_handler=error_handler)
+        issues += self._run_checks(data, onset_filtered, error_handler=error_handler, row_adj=row_adj)
         error_handler.pop_error_context()
 
         issues = sort_issues(issues)
         return issues
 
-    def _run_checks(self, hed_df, onset_filtered, error_handler):
+    def _run_checks(self, hed_df, onset_filtered, error_handler, row_adj):
         issues = []
         columns = list(hed_df.columns)
         for row_number, text_file_row in enumerate(hed_df.itertuples(index=False)):
-            error_handler.push_error_context(ErrorContext.ROW, row_number)
+            error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
             row_strings = []
             new_column_issues = []
             for column_number, cell in enumerate(text_file_row):
@@ -100,13 +105,14 @@ def _run_checks(self, hed_df, onset_filtered, error_handler):
             error_handler.pop_error_context()
         return issues
 
-    def _validate_column_structure(self, base_input, error_handler):
+    def _validate_column_structure(self, base_input, error_handler, row_adj):
         """
         Validate that each column in the input data has valid values.
 
         Parameters:
             base_input (BaseInput): The input data to be validated.
             error_handler (ErrorHandler): Holds context
+            row_adj(int): Number to adjust row by for reporting errors
         Returns:
             List of issues associated with each invalid value. Each issue is a dictionary.
         """
@@ -120,7 +126,7 @@ def _validate_column_structure(self, base_input, error_handler):
                 valid_keys = column.hed_dict.keys()
                 for row_number, value in enumerate(base_input.dataframe[column.column_name]):
                     if value != "n/a" and value not in valid_keys:
-                        error_handler.push_error_context(ErrorContext.ROW, row_number)
+                        error_handler.push_error_context(ErrorContext.ROW, row_number + row_adj)
                         issues += error_handler.format_error_with_context(ValidationErrors.SIDECAR_KEY_MISSING,
                                                                           invalid_key=value,
                                                                           category_keys=list(valid_keys))
diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py
index 7f5282e4..02ef32df 100644
--- a/tests/models/test_tabular_input.py
+++ b/tests/models/test_tabular_input.py
@@ -5,7 +5,7 @@
 from hed.models import DefinitionEntry, Sidecar, TabularInput
 from hed import schema
 from hed.errors import HedFileError
-from hed.errors import ErrorHandler
+from hed.errors import ErrorHandler, ErrorContext
 
 
 class Test(unittest.TestCase):
@@ -58,6 +58,7 @@ def test_expand_column_issues(self):
         input_file = TabularInput(events_path, sidecar=sidecar)
 
         validation_issues = input_file.validate(hed_schema=self.hed_schema)
+        self.assertEqual(validation_issues[0][ErrorContext.ROW], 2)
         self.assertEqual(len(validation_issues), 1)
 
     def test_blank_and_duplicate_columns(self):

From e9e63e39f6a99487c5e69ae96e653c4580045ec0 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Thu, 12 Oct 2023 10:29:43 -0500
Subject: [PATCH 07/21] Corrected an error in the formatting of text output of
 validation summary

---
 hed/tools/remodeling/cli/run_remodel.py       | 18 +++++++++--------
 .../operations/summarize_hed_tags_op.py       |  2 ++
 .../operations/summarize_hed_validation_op.py | 20 +++++++++++--------
 tests/models/test_hed_group.py                |  2 +-
 4 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py
index 45fce71a..8aefdb15 100644
--- a/hed/tools/remodeling/cli/run_remodel.py
+++ b/hed/tools/remodeling/cli/run_remodel.py
@@ -142,21 +142,21 @@ def run_bids_ops(dispatch, args, tabular_files):
     dispatch.hed_schema = bids.schema
     if args.verbose:
         print(f"Successfully parsed BIDS dataset with HED schema {str(bids.schema.get_schema_versions())}")
-    events = bids.get_tabular_group(args.file_suffix)
+    data = bids.get_tabular_group(args.file_suffix)
     if args.verbose:
         print(f"Processing {dispatch.data_root}")
-    filtered_events = [events.datafile_dict[key] for key in tabular_files]
-    for events_obj in filtered_events:
-        sidecar_list = events.get_sidecars_from_path(events_obj)
+    filtered_events = [data.datafile_dict[key] for key in tabular_files]
+    for data_obj in filtered_events:
+        sidecar_list = data.get_sidecars_from_path(data_obj)
         if sidecar_list:
-            sidecar = events.sidecar_dict[sidecar_list[-1]].contents
+            sidecar = data.sidecar_dict[sidecar_list[-1]].contents
         else:
             sidecar = None
         if args.verbose:
-            print(f"Events {events_obj.file_path}  sidecar {sidecar}")
-        df = dispatch.run_operations(events_obj.file_path, sidecar=sidecar, verbose=args.verbose)
+            print(f"Tabular file {data_obj.file_path}  sidecar {sidecar}")
+        df = dispatch.run_operations(data_obj.file_path, sidecar=sidecar, verbose=args.verbose)
         if not args.no_update:
-            df.to_csv(events_obj.file_path, sep='\t', index=False, header=True)
+            df.to_csv(data_obj.file_path, sep='\t', index=False, header=True)
 
 
 def run_direct_ops(dispatch, args, tabular_files):
@@ -176,6 +176,8 @@ def run_direct_ops(dispatch, args, tabular_files):
     else:
         sidecar = None
     for file_path in tabular_files:
+        if args.verbose:
+            print(f"Tabular file {file_path}  sidecar {sidecar}")
         df = dispatch.run_operations(file_path, verbose=args.verbose, sidecar=sidecar)
         if not args.no_update:
             df.to_csv(file_path, sep='\t', index=False, header=True)
diff --git a/hed/tools/remodeling/operations/summarize_hed_tags_op.py b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
index ffef53fb..6e98abbe 100644
--- a/hed/tools/remodeling/operations/summarize_hed_tags_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_tags_op.py
@@ -87,6 +87,8 @@ def do_op(self, dispatcher, df, name, sidecar=None):
         if not summary:
             summary = HedTagSummary(self)
             dispatcher.summary_dicts[self.summary_name] = summary
+        x = {'df': dispatcher.post_proc_data(df_new), 'name': name,
+                                'schema': dispatcher.hed_schema, 'sidecar': sidecar}
         summary.update_summary({'df': dispatcher.post_proc_data(df_new), 'name': name,
                                 'schema': dispatcher.hed_schema, 'sidecar': sidecar})
         return df_new
diff --git a/hed/tools/remodeling/operations/summarize_hed_validation_op.py b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
index cd3fc936..a2948eb8 100644
--- a/hed/tools/remodeling/operations/summarize_hed_validation_op.py
+++ b/hed/tools/remodeling/operations/summarize_hed_validation_op.py
@@ -1,7 +1,7 @@
 """ Validate the HED tags in a dataset and report errors. """
 
 import os
-from hed.errors import ErrorSeverity, ErrorHandler
+from hed.errors import ErrorSeverity, ErrorHandler, get_printable_issue_string
 from hed.models.sidecar import Sidecar
 from hed.models.tabular_input import TabularInput
 from hed.tools.remodeling.operations.base_op import BaseOp
@@ -110,9 +110,9 @@ def _get_result_string(self, name, result, indent=BaseSummary.DISPLAY_INDENT):
         else:
             sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], indent=indent*2)
             if specifics['sidecar_had_issues']:
-                sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2)
+                sum_list = sum_list + self.get_error_list(specifics['sidecar_issues'], count_only=False, indent=indent*2)
             else:
-                sum_list = sum_list + [f"{indent*2}Event file validation was incomplete because of sidecar errors"]
+                sum_list = sum_list + self.get_error_list(specifics['event_issues'], count_only=False, indent=indent*2)
         return "\n".join(sum_list)
 
     def update_summary(self, new_info):
@@ -134,6 +134,7 @@ def update_summary(self, new_info):
             issues = input_data.validate(new_info['schema'])
             if not self.check_for_warnings:
                 issues = ErrorHandler.filter_issues_by_severity(issues, ErrorSeverity.ERROR)
+            issues = [get_printable_issue_string([issue], skip_filename=True) for issue in issues]
             results['event_issues'][new_info["name"]] = issues
             results['total_event_issues'] = len(issues)
         self.summary_dict[new_info["name"]] = results
@@ -197,13 +198,15 @@ def get_error_list(error_dict, count_only=False, indent=BaseSummary.DISPLAY_INDE
         error_list = []
         for key, item in error_dict.items():
             if count_only and isinstance(item, list):
-                error_list.append(f"{indent}{key}: {len(item)} issues")
+                error_list.append(f"{key}: {len(item)} issues")
             elif count_only:
-                error_list.append(f"{indent}{key}: {item} issues")
+                error_list.append(f"{key}: {item} issues")
             elif not len(item):
-                error_list.append(f"{indent}{key} has no issues")
+                error_list.append(f"{key} has no issues")
             else:
-                HedValidationSummary._format_errors(error_list, key, item, indent)
+                error_list.append(f"{key}:")
+                error_list = error_list + item
+                #HedValidationSummary._format_errors(error_list, key, item, indent)
         return error_list
 
     @staticmethod
@@ -246,6 +249,7 @@ def _get_sidecar_results(sidecar, new_info, check_for_warnings):
                 results["sidecar_had_issues"] = True
             if not check_for_warnings:
                 sidecar_issues = filtered_issues
-            results['sidecar_issues'][sidecar.name] = sidecar_issues
+            str_issues = [get_printable_issue_string([issue], skip_filename=True) for issue in sidecar_issues]
+            results['sidecar_issues'][sidecar.name] = str_issues
             results['total_sidecar_issues'] = len(sidecar_issues)
         return results
diff --git a/tests/models/test_hed_group.py b/tests/models/test_hed_group.py
index 117872e8..1a3632bf 100644
--- a/tests/models/test_hed_group.py
+++ b/tests/models/test_hed_group.py
@@ -62,7 +62,7 @@ def test_find_tags_with_term(self):
         # located tags now has found all 5 hed tags
 
         # This will find no tags
-        located_tags = basic_hed_string_obj.find_tags_with_term("bject", recursive=True, include_groups=0)
+        located_tags = basic_hed_string_obj.find_tags_with_term("reject", recursive=True, include_groups=0)
         self.assertEqual(len(located_tags), 0)
 
         # this will also find no tags

From 06fd31686b861ceccefb7d0b32122c3ad54cd328 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 15:56:50 -0500
Subject: [PATCH 08/21] Try bumping python version/restricting non master tests

---
 .github/workflows/ci.yaml | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 919e5c8f..8927786a 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -7,11 +7,26 @@ on:
     branches: ["*"]
 
 jobs:
+  determine_version:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - id: set-matrix
+        run: |
+          if [[ "${{ github.ref }}" == 'refs/heads/master' ]]; then
+            echo "::set-output name=matrix::[3.7, 3.9, 3.10, 3.11]"
+          else
+            echo "::set-output name=matrix::[3.9]"
+          end
+
   build:
+    needs: determine_version
     strategy:
       matrix:
         platform: [ubuntu-latest]
-        python-version: [3.7, 3.9]
+        python-version: ${{fromJson(needs.determine_version.outputs.matrix)}}
+
 
     runs-on: ${{ matrix.platform }}
 

From 8c844f9b03675d6b56e32584efca8dde875ec31c Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 15:59:27 -0500
Subject: [PATCH 09/21] Use correct loop end word

---
 .github/workflows/ci.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 8927786a..25625de2 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -18,7 +18,7 @@ jobs:
             echo "::set-output name=matrix::[3.7, 3.9, 3.10, 3.11]"
           else
             echo "::set-output name=matrix::[3.9]"
-          end
+          fi
 
   build:
     needs: determine_version
@@ -27,7 +27,6 @@ jobs:
         platform: [ubuntu-latest]
         python-version: ${{fromJson(needs.determine_version.outputs.matrix)}}
 
-
     runs-on: ${{ matrix.platform }}
 
     steps:

From 97b834189fff6078a19f05ce7c28d38f1d403291 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 16:03:44 -0500
Subject: [PATCH 10/21] Don't use semi-deprecated style

---
 .github/workflows/ci.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 25625de2..01a48a12 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -15,9 +15,9 @@ jobs:
       - id: set-matrix
         run: |
           if [[ "${{ github.ref }}" == 'refs/heads/master' ]]; then
-            echo "::set-output name=matrix::[3.7, 3.9, 3.10, 3.11]"
+            echo "matrix=[3.7, 3.9, 3.10, 3.11]" >> $GITHUB_OUTPUT
           else
-            echo "::set-output name=matrix::[3.9]"
+            echo "matrix=[3.9]" >> $GITHUB_OUTPUT
           fi
 
   build:

From 3fe606be962145b3cdb477092adf79ffc040b25b Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 16:13:19 -0500
Subject: [PATCH 11/21] Try to detect master differently

---
 .github/workflows/ci.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 01a48a12..25f09d8b 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -14,7 +14,11 @@ jobs:
     steps:
       - id: set-matrix
         run: |
-          if [[ "${{ github.ref }}" == 'refs/heads/master' ]]; then
+          if [[ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == 'refs/heads/master' ]]; then
+            # Push to master branch
+            echo "matrix=[3.7, 3.9, 3.10, 3.11]" >> $GITHUB_OUTPUT
+          elif [[ "${{ github.event_name }}" == 'pull_request' && "${{ github.event.pull_request.base.ref }}" == 'master' ]]; then
+            # PR to master branch
             echo "matrix=[3.7, 3.9, 3.10, 3.11]" >> $GITHUB_OUTPUT
           else
             echo "matrix=[3.9]" >> $GITHUB_OUTPUT

From 39c7487d2c7e5ea3453232daeb52839c69378368 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 16:19:34 -0500
Subject: [PATCH 12/21] don't treat 3.10 as 3.1

---
 .github/workflows/ci.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 25f09d8b..ea78321f 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -16,12 +16,12 @@ jobs:
         run: |
           if [[ "${{ github.event_name }}" == 'push' && "${{ github.ref }}" == 'refs/heads/master' ]]; then
             # Push to master branch
-            echo "matrix=[3.7, 3.9, 3.10, 3.11]" >> $GITHUB_OUTPUT
+            echo 'matrix=["3.7", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT
           elif [[ "${{ github.event_name }}" == 'pull_request' && "${{ github.event.pull_request.base.ref }}" == 'master' ]]; then
             # PR to master branch
-            echo "matrix=[3.7, 3.9, 3.10, 3.11]" >> $GITHUB_OUTPUT
+            echo 'matrix=["3.7", "3.9", "3.10", "3.11"]' >> $GITHUB_OUTPUT
           else
-            echo "matrix=[3.9]" >> $GITHUB_OUTPUT
+            echo 'matrix=["3.9"]' >> $GITHUB_OUTPUT
           fi
 
   build:

From a762ba76a490a17f511cd90c79a7da9267fae912 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 18:35:51 -0500
Subject: [PATCH 13/21] Maybe supress warning in 3.7 code

---
 hed/models/base_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hed/models/base_input.py b/hed/models/base_input.py
index 9f437102..58bb94c9 100644
--- a/hed/models/base_input.py
+++ b/hed/models/base_input.py
@@ -137,7 +137,7 @@ def _indexed_dict_from_onsets(onsets):
 
     @staticmethod
     def _filter_by_index_list(original_series, indexed_dict):
-        new_series = pd.Series(["n/a"] * len(original_series))
+        new_series = pd.Series(["n/a"] * len(original_series), dtype=str)
 
         for onset, indices in indexed_dict.items():
             if indices:

From 79f2d83f053fa0a6eb28cf62be3c7a50c360d229 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 12 Oct 2023 18:45:01 -0500
Subject: [PATCH 14/21] fix test case maybe

---
 tests/models/test_base_input.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_base_input.py b/tests/models/test_base_input.py
index 71e21386..02c7f34a 100644
--- a/tests/models/test_base_input.py
+++ b/tests/models/test_base_input.py
@@ -304,7 +304,7 @@ def test_complex_onsets(self):
                          {3.5: [0, 1], 4.0: [2], 4.4: [3, 4], -1.0: [5]})
 
     def test_empty_and_single_item_series(self):
-        self.assertTrue(BaseInput._filter_by_index_list(pd.Series([]), {}).equals(pd.Series([])))
+        self.assertTrue(BaseInput._filter_by_index_list(pd.Series([], dtype=str), {}).equals(pd.Series([], dtype=str)))
         self.assertTrue(BaseInput._filter_by_index_list(pd.Series(["apple"]), {0: [0]}).equals(pd.Series(["apple"])))
 
     def test_two_item_series_with_same_onset(self):

From cf5cedc597755af62b6bff90cbd7424bc5514480 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 13 Oct 2023 19:17:14 -0500
Subject: [PATCH 15/21] Initial codespell commit

---
 .github/workflows/codespell.yaml | 22 ++++++++++++++++++++++
 pyproject.toml                   |  4 ++++
 2 files changed, 26 insertions(+)
 create mode 100644 .github/workflows/codespell.yaml

diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml
new file mode 100644
index 00000000..3deac45a
--- /dev/null
+++ b/.github/workflows/codespell.yaml
@@ -0,0 +1,22 @@
+---
+name: Codespell
+
+on:
+  push:
+    branches: [develop]
+  pull_request:
+    branches: [develop]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 8bc13baf..f5549033 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,7 @@
 [build-system]
 requires = ["setuptools>=42", "versioneer-518"]
 build-backend = "setuptools.build_meta"
+
+[tool.codespell]
+skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld'
+ignore-words-list = 'te,parms'

From 9afb7b66e62bdd6b96ce9f97503f73a2fe811635 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Mon, 16 Oct 2023 14:23:02 -0500
Subject: [PATCH 16/21] Do spelling fixes, excluse spec tests folder

---
 hed/models/column_mapper.py     | 2 +-
 hed/models/expression_parser.py | 2 +-
 pyproject.toml                  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py
index 761ab81a..e0948b9b 100644
--- a/hed/models/column_mapper.py
+++ b/hed/models/column_mapper.py
@@ -149,7 +149,7 @@ def _set_sidecar(self, sidecar):
             sidecar (Sidecar or None): the sidecar to use
 
         :raises ValueError:
-            - A sidecar was prevoiusly set
+            - A sidecar was previously set
         """
         if self._sidecar:
             raise ValueError("Trying to set a second sidecar on a column mapper.")
diff --git a/hed/models/expression_parser.py b/hed/models/expression_parser.py
index ab56aa76..736ff562 100644
--- a/hed/models/expression_parser.py
+++ b/hed/models/expression_parser.py
@@ -235,7 +235,7 @@ def handle_expr(self, hed_group, exact=False):
 class ExpressionOr(Expression):
     def handle_expr(self, hed_group, exact=False):
         groups1 = self.left.handle_expr(hed_group, exact=exact)
-        # Don't early out as we need to gather all groups incase tags appear more than once etc
+        # Don't early out as we need to gather all groups in case tags appear more than once etc
         groups2 = self.right.handle_expr(hed_group, exact=exact)
         # todo: optimize this eventually
         # Filter out duplicates
diff --git a/pyproject.toml b/pyproject.toml
index f5549033..7ba509cc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,5 +3,5 @@ requires = ["setuptools>=42", "versioneer-518"]
 build-backend = "setuptools.build_meta"
 
 [tool.codespell]
-skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld'
+skip = '*.git,*.pdf,*.xml,*.mediawiki,*.svg,versioneer.py,venv*,*.tsv,*.yaml,*.yml,*.json,*.rdf,*.jsonld,spec_tests'
 ignore-words-list = 'te,parms'

From 05aec79ecc82743ae8f7f7b72b1d93c12063dde6 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Mon, 16 Oct 2023 16:36:12 -0500
Subject: [PATCH 17/21] Fix Monique noitced in documentation

---
 hed/tools/remodeling/operations/remove_columns_op.py | 4 ++--
 hed/tools/remodeling/operations/rename_columns_op.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hed/tools/remodeling/operations/remove_columns_op.py b/hed/tools/remodeling/operations/remove_columns_op.py
index 267a7039..6901b6ce 100644
--- a/hed/tools/remodeling/operations/remove_columns_op.py
+++ b/hed/tools/remodeling/operations/remove_columns_op.py
@@ -6,8 +6,8 @@ class RemoveColumnsOp(BaseOp):
     """ Remove columns from a tabular file.
 
     Required remodeling parameters:
-        - **remove_names** (*list*): The names of the columns to be removed.  
-        - **ignore_missing** (*boolean*): If true, names in remove_names that are not columns in df should be ignored.  
+        - **column_names** (*list*): The names of the columns to be removed.
+        - **ignore_missing** (*boolean*): If true, names in column_names that are not columns in df should be ignored.
 
     """
 
diff --git a/hed/tools/remodeling/operations/rename_columns_op.py b/hed/tools/remodeling/operations/rename_columns_op.py
index 2a2f275a..0a3329b0 100644
--- a/hed/tools/remodeling/operations/rename_columns_op.py
+++ b/hed/tools/remodeling/operations/rename_columns_op.py
@@ -8,7 +8,7 @@ class RenameColumnsOp (BaseOp):
 
     Required remodeling parameters:   
         - **column_mapping** (*dict*): The names of the columns to be removed.   
-        - **ignore_missing** (*bool*): If true, the names in remove_names that are not columns and should be ignored.   
+        - **ignore_missing** (*bool*): If true, the names in column_mapping that are not columns and should be ignored.
 
     """
 

From 04acff8e2ebfbe39b91d68485c711f07b77a050d Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 20 Oct 2023 18:42:56 -0500
Subject: [PATCH 18/21] Improve toctree in documentation

---
 docs/source/_templates/autosummary/class.rst  | 34 ----------
 .../_templates/custom-class-template.rst      | 68 +++++++++++--------
 .../_templates/custom-module-template.rst     | 27 +++++---
 docs/source/index.rst                         |  4 +-
 4 files changed, 58 insertions(+), 75 deletions(-)
 delete mode 100644 docs/source/_templates/autosummary/class.rst

diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst
deleted file mode 100644
index 50efd957..00000000
--- a/docs/source/_templates/autosummary/class.rst
+++ /dev/null
@@ -1,34 +0,0 @@
-{{ fullname | escape | underline}}
-
-.. currentmodule:: {{ module }}
-
-.. autoclass:: {{ module }}.{{ objname }}
-   {% block methods %}
-   .. automethod:: __init__
-
-   {% if methods %}
-   .. rubric:: {{ _('Methods') }}
-
-   .. autosummary::
-   {% for item in methods %}
-      ~{{ name }}.{{ item }}
-   {%- endfor %}
-   {% endif %}
-   {% endblock %}
-
-   {% block attributes %}
-   {% if attributes %}
-   .. rubric:: {{ _('Attributes') }}
-
-   .. autosummary::
-   {% for item in attributes %}
-      ~{{ name }}.{{ item }}
-   {%- endfor %}
-   {% endif %}
-   {% endblock %}
-
-.. autoclass:: {{ module }}.{{ objname }}
-   :members:
-   :undoc-members:
-   :show-inheritance:
-   :inherited-members:
diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst
index 16ebb2f3..490f83ee 100644
--- a/docs/source/_templates/custom-class-template.rst
+++ b/docs/source/_templates/custom-class-template.rst
@@ -1,32 +1,42 @@
-{{ fullname | escape | underline}}
+{{ fullname | escape | underline }}
 
 .. currentmodule:: {{ module }}
 
-.. autoclass:: {{ objname }}
-   :members:
-   :show-inheritance:
-   :inherited-members:
-
-   {% block methods %}
-   .. automethod:: __init__
-
-   {% if methods %}
-   .. rubric:: {{ _('Methods') }}
-
-   .. autosummary::
-   {% for item in methods %}
-      ~{{ name }}.{{ item }}
-   {%- endfor %}
-   {% endif %}
-   {% endblock %}
-
-   {% block attributes %}
-   {% if attributes %}
-   .. rubric:: {{ _('Attributes') }}
-
-   .. autosummary::
-   {% for item in attributes %}
-      ~{{ name }}.{{ item }}
-   {%- endfor %}
-   {% endif %}
-   {% endblock %}
\ No newline at end of file
+.. autoclass:: {{ module }}.{{ objname }}
+   :noindex:
+
+.. rubric:: {{ _('Methods') }}
+
+.. autosummary::
+{% for item in methods %}
+   {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+.. rubric:: {{ _('Attributes') }}
+
+.. autosummary::
+{% for item in attributes %}
+   {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+.. toctree::
+   :hidden:
+
+{% for item in methods %}
+   {{ fullname }}#method-{{ item }}
+{%- endfor %}
+{% for item in attributes %}
+   {{ fullname }}#attribute-{{ item }}
+{%- endfor %}
+
+{% for item in methods %}
+.. _method-{{ item }}:
+
+.. automethod:: {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
+
+{% for item in attributes %}
+.. _attribute-{{ item }}:
+
+.. autoattribute:: {{ module }}.{{ objname }}.{{ item }}
+{%- endfor %}
diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst
index 74078355..7fec9f9c 100644
--- a/docs/source/_templates/custom-module-template.rst
+++ b/docs/source/_templates/custom-module-template.rst
@@ -1,5 +1,7 @@
 {{ fullname | escape | underline}}
 
+.. currentmodule:: {{ module }}
+
 .. automodule:: {{ fullname }}
 
    {% block attributes %}
@@ -14,17 +16,22 @@
    {% endif %}
    {% endblock %}
 
-   {% block functions %}
-   {% if functions %}
-   .. rubric:: {{ _('Functions') }}
+    {% block functions %}
+    {% if functions %}
+    .. rubric:: {{ _('Functions') }}
 
-   .. autosummary::
-      :toctree:
-   {% for item in functions %}
-      {{ item }}
-   {%- endfor %}
-   {% endif %}
-   {% endblock %}
+    .. autosummary::
+    {% for item in functions %}
+       {{ item }}
+    {% endfor %}
+
+    {% for item in functions %}
+    .. _{{ item }}:
+
+    .. autofunction:: {{ item }}
+    {% endfor %}
+    {% endif %}
+    {% endblock %}
 
    {% block classes %}
    {% if classes %}
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 924fb2b7..3b82987d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,14 +14,14 @@ Hierarchical Event Descriptor (HED) Python Tools
 Note:  this is a work in progress. More information is coming.
 
 .. toctree::
-   :maxdepth: 4
+   :maxdepth: 6
    :caption: Contents:
 
    introduction.md
    user_guide.rst
 
 .. toctree::
-   :maxdepth: 4
+   :maxdepth: 6
    :caption: HED Python API:
 
    api2.rst

From 140676040e33242f3284bc6088c11e650709878a Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Fri, 20 Oct 2023 19:13:53 -0500
Subject: [PATCH 19/21] Don't draw full titles.  Full names are still drawn
 some other places

---
 docs/source/_templates/custom-class-template.rst  | 2 +-
 docs/source/_templates/custom-module-template.rst | 2 +-
 docs/source/api2.rst                              | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/_templates/custom-class-template.rst b/docs/source/_templates/custom-class-template.rst
index 490f83ee..cf03326d 100644
--- a/docs/source/_templates/custom-class-template.rst
+++ b/docs/source/_templates/custom-class-template.rst
@@ -1,4 +1,4 @@
-{{ fullname | escape | underline }}
+{{ fullname.split('.')[-1] | escape | underline }}
 
 .. currentmodule:: {{ module }}
 
diff --git a/docs/source/_templates/custom-module-template.rst b/docs/source/_templates/custom-module-template.rst
index 7fec9f9c..9e9c8a77 100644
--- a/docs/source/_templates/custom-module-template.rst
+++ b/docs/source/_templates/custom-module-template.rst
@@ -1,4 +1,4 @@
-{{ fullname | escape | underline}}
+{{ fullname.split('.')[-1] | escape | underline}}
 
 .. currentmodule:: {{ module }}
 
diff --git a/docs/source/api2.rst b/docs/source/api2.rst
index 02d3aeae..1f78f469 100644
--- a/docs/source/api2.rst
+++ b/docs/source/api2.rst
@@ -1,5 +1,5 @@
-HED API reference (Auto style)
-==============================
+HED API reference
+=================
 
 .. currentmodule:: hed
 

From 3291b4c2bfb4b9c9b4d76076c14fffbc834e2525 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 23 Oct 2023 11:40:02 +0000
Subject: [PATCH 20/21] Bump actions/checkout from 3 to 4

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/codespell.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/codespell.yaml b/.github/workflows/codespell.yaml
index 3deac45a..9c6511f6 100644
--- a/.github/workflows/codespell.yaml
+++ b/.github/workflows/codespell.yaml
@@ -17,6 +17,6 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Codespell
         uses: codespell-project/actions-codespell@v2
\ No newline at end of file

From 9814e4ab58f3914a99fc2dc2ca1711ed690c5ee3 Mon Sep 17 00:00:00 2001
From: Kay Robbins <1189050+VisLab@users.noreply.github.com>
Date: Mon, 23 Oct 2023 11:01:33 -0500
Subject: [PATCH 21/21] Updated the remodeling backup arguments

---
 hed/tools/remodeling/backup_manager.py        |  7 +--
 hed/tools/remodeling/cli/run_remodel.py       |  8 ++-
 .../remodeling/cli/run_remodel_backup.py      | 18 +++----
 .../remodeling/cli/run_remodel_restore.py     | 12 ++---
 .../tools/remodeling/cli/test_run_remodel.py  | 14 ++---
 .../remodeling/cli/test_run_remodel_backup.py | 51 ++++++++++---------
 .../cli/test_run_remodel_restore.py           | 26 +++++-----
 tests/tools/remodeling/test_backup_manager.py |  2 +-
 8 files changed, 67 insertions(+), 71 deletions(-)

diff --git a/hed/tools/remodeling/backup_manager.py b/hed/tools/remodeling/backup_manager.py
index ac18f2f0..75c6f4f1 100644
--- a/hed/tools/remodeling/backup_manager.py
+++ b/hed/tools/remodeling/backup_manager.py
@@ -10,7 +10,7 @@
 
 class BackupManager:
     DEFAULT_BACKUP_NAME = 'default_back'
-    RELATIVE_BACKUP_LOCATION = 'derivatives/remodel'
+    RELATIVE_BACKUP_LOCATION = './derivatives/remodel/backups'
     BACKUP_DICTIONARY = 'backup_lock.json'
     BACKUP_ROOT = 'backup_root'
 
@@ -24,14 +24,15 @@ def __init__(self, data_root, backups_root=None):
         :raises HedFileError:
             - If the data_root does not correspond to a real directory.
 
+        Notes: The backup_root will have remodeling/backups appended.
         """
         if not os.path.isdir(data_root):
             raise HedFileError('NonExistentData', f"{data_root} is not an existing directory", "")
         self.data_root = data_root
         if backups_root:
-            self.backups_path = os.path.join(backups_root, 'backups')
+            self.backups_path = backups_root
         else:
-            self.backups_path = os.path.join(data_root, self.RELATIVE_BACKUP_LOCATION, 'backups')
+            self.backups_path = os.path.join(data_root, self.RELATIVE_BACKUP_LOCATION)
         self.backups_path = os.path.realpath(self.backups_path)
         os.makedirs(self.backups_path, exist_ok=True)
         self.backups_dict = self._get_backups()
diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py
index 8aefdb15..32af02ea 100644
--- a/hed/tools/remodeling/cli/run_remodel.py
+++ b/hed/tools/remodeling/cli/run_remodel.py
@@ -20,6 +20,10 @@ def get_parser():
     parser = argparse.ArgumentParser(description="Converts event files based on a json file specifying operations.")
     parser.add_argument("data_dir", help="Full path of dataset root directory.")
     parser.add_argument("model_path", help="Full path of the file with remodeling instructions.")
+    parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+                        help="Directory for the backup that is being created")
+    parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+                        help="Name of the default backup for remodeling")
     parser.add_argument("-b", "--bids-format", action='store_true', dest="use_bids",
                         help="If present, the dataset is in BIDS format with sidecars. HED analysis is available.")
     parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
@@ -31,8 +35,8 @@ def get_parser():
                         help="Controls individual file summaries ('none', 'separate', 'consolidated')")
     parser.add_argument("-j", "--json-sidecar", dest="json_sidecar", nargs="?",
                         help="Optional path to JSON sidecar with HED information")
-    parser.add_argument("-n", "--backup-name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
-                        help="Name of the default backup for remodeling")
+#    parser.add_argument("-n", "--backup-name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+#                        help="Name of the default backup for remodeling")
     parser.add_argument("-nb", "--no-backup", action='store_true', dest="no_backup",
                         help="If present, the operations are run directly on the files with no backup.")
     parser.add_argument("-ns", "--no-summaries", action='store_true', dest="no_summaries",
diff --git a/hed/tools/remodeling/cli/run_remodel_backup.py b/hed/tools/remodeling/cli/run_remodel_backup.py
index 6d78465d..3754a15d 100644
--- a/hed/tools/remodeling/cli/run_remodel_backup.py
+++ b/hed/tools/remodeling/cli/run_remodel_backup.py
@@ -1,5 +1,6 @@
 """ Command-line program for creating a backup. """
 
+import os
 import argparse
 from hed.errors.exceptions import HedFileError
 from hed.tools.util.io_util import get_file_list, get_filtered_by_element
@@ -15,21 +16,18 @@ def get_parser():
     """
     parser = argparse.ArgumentParser(description="Creates a backup for the remodeling process.")
     parser.add_argument("data_dir", help="Full path of dataset root directory.")
+    parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+                        help="Directory for the backup that is being created")
+    parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+                        help="Name of the default backup for remodeling")
     parser.add_argument("-e", "--extensions", nargs="*", default=['.tsv'], dest="extensions",
                         help="File extensions to allow in locating files. A * indicates all files allowed.")
     parser.add_argument("-f", "--file-suffix", dest="file_suffix", nargs="*", default=['events'],
                         help="Filename suffix of files to be backed up. A * indicates all files allowed.")
-    parser.add_argument("-n", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
-                        help="Name of the default backup for remodeling")
-    parser.add_argument("-p", "--path-work", default="", dest="path_work",
-                        help="The root path for remodeling work if given, " +
-                             "otherwise [data_root]/derivatives/remodel is used.")
+
     parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The name of the task.")
     parser.add_argument("-v", "--verbose", action='store_true',
                         help="If present, output informative messages as computation progresses.")
-    parser.add_argument("-w", "--work-dir", default="", dest="work_dir",
-                        help="If given, is the path to directory for saving, " +
-                             "otherwise [data_root]derivatives/remodel is used.")
     parser.add_argument("-x", "--exclude-dirs", nargs="*", default=['derivatives'], dest="exclude_dirs",
                         help="Directories names to exclude from search for files. " +
                              "If omitted, no directories except the backup directory will be excluded." +
@@ -60,8 +58,8 @@ def main(arg_list=None):
                               exclude_dirs=exclude_dirs)
     if args.task_names:
         file_list = get_filtered_by_element(file_list, args.task_names)
-    if args.work_dir:
-        backups_root = args.work_dir
+    if args.backup_dir:
+        backups_root = args.backup_dir
     else:
         backups_root = None
     backup_man = BackupManager(args.data_dir, backups_root=backups_root)
diff --git a/hed/tools/remodeling/cli/run_remodel_restore.py b/hed/tools/remodeling/cli/run_remodel_restore.py
index 960bd091..72ba0c3c 100644
--- a/hed/tools/remodeling/cli/run_remodel_restore.py
+++ b/hed/tools/remodeling/cli/run_remodel_restore.py
@@ -14,15 +14,13 @@ def get_parser():
     """
     parser = argparse.ArgumentParser(description="Restores the backup files for the original data.")
     parser.add_argument("data_dir", help="Full path of dataset root directory.")
-    parser.add_argument("-n", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
+    parser.add_argument("-bd", "--backup_dir", default="", dest="backup_dir",
+                        help="Directory for the backup that is being created")
+    parser.add_argument("-bn", "--backup_name", default=BackupManager.DEFAULT_BACKUP_NAME, dest="backup_name",
                         help="Name of the default backup for remodeling")
-
     parser.add_argument("-t", "--task-names", dest="task_names", nargs="*", default=[], help="The names of the task.")
     parser.add_argument("-v", "--verbose", action='store_true',
                         help="If present, output informative messages as computation progresses.")
-    parser.add_argument("-w", "--work_dir", default="", dest="work_dir",
-                        help="The root path for remodeling work if given, " +
-                             "otherwise [data_root]/derivatives/remodel is used.")
     return parser
 
 
@@ -39,8 +37,8 @@ def main(arg_list=None):
     """
     parser = get_parser()
     args = parser.parse_args(arg_list)
-    if args.work_dir:
-        backups_root = args.work_dir
+    if args.backup_dir:
+        backups_root = args.backup_dir
     else:
         backups_root = None
     backup_man = BackupManager(args.data_dir, backups_root=backups_root)
diff --git a/tests/tools/remodeling/cli/test_run_remodel.py b/tests/tools/remodeling/cli/test_run_remodel.py
index a63b5be2..1d2f4b91 100644
--- a/tests/tools/remodeling/cli/test_run_remodel.py
+++ b/tests/tools/remodeling/cli/test_run_remodel.py
@@ -51,7 +51,7 @@ def tearDownClass(cls):
 
     def test_parse_arguments(self):
         # Test no verbose
-        arg_list1 = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1']
+        arg_list1 = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1']
         with patch('sys.stdout', new=io.StringIO()) as fp1:
             args1, operations1 = parse_arguments(arg_list1)
             self.assertFalse(fp1.getvalue())
@@ -60,7 +60,7 @@ def test_parse_arguments(self):
         self.assertEqual(args1.file_suffix, 'events')
 
         # Test * for extensions and suffix as well as verbose
-        arg_list2 = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1', '-f', '*', '-e', '*', '-v']
+        arg_list2 = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1', '-f', '*', '-e', '*', '-v']
         with patch('sys.stdout', new=io.StringIO()) as fp2:
             args2, operations2 = parse_arguments(arg_list2)
             self.assertTrue(fp2.getvalue())
@@ -164,13 +164,13 @@ def test_main_bids_no_sidecar_with_hed_task(self):
 
     def test_main_errors(self):
         # Test bad data directory
-        arg_list = ['junk/junk', self.model_path, '-x', 'derivatives', '-n', 'back1']
+        arg_list = ['junk/junk', self.model_path, '-x', 'derivatives', '-bn', 'back1']
         with self.assertRaises(HedFileError) as context:
             main(arg_list=arg_list)
         self.assertEqual(context.exception.args[0], "DataDirectoryDoesNotExist")
 
         # Test no backup
-        arg_list = [self.data_root, self.model_path, '-x', 'derivatives', '-n', 'back1']
+        arg_list = [self.data_root, self.model_path, '-x', 'derivatives', '-bn', 'back1']
         with self.assertRaises(HedFileError) as context:
             main(arg_list=arg_list)
         self.assertEqual(context.exception.args[0], "BackupDoesNotExist")
@@ -193,12 +193,6 @@ def test_run_bids_ops_verbose(self):
             main(arg_list)
             self.assertFalse(fp.getvalue())
 
-    # def test_temp(self):
-    #     data_root = "g:/ds002718OpenNeuro"
-    #     model_path = 'G:/wh_excerpt_rmdl.json'
-    #     arg_list = [data_root, model_path, '-x', 'derivatives', 'code', 'stimuli', '-b', '-n', '']
-    #     main(arg_list)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/tools/remodeling/cli/test_run_remodel_backup.py b/tests/tools/remodeling/cli/test_run_remodel_backup.py
index 65f2391b..2dbf2770 100644
--- a/tests/tools/remodeling/cli/test_run_remodel_backup.py
+++ b/tests/tools/remodeling/cli/test_run_remodel_backup.py
@@ -15,7 +15,9 @@ class Test(unittest.TestCase):
     def setUpClass(cls):
         file_list = ['top_level.tsv', 'sub1/sub1_events.tsv', 'sub2/sub2_events.tsv', 'sub2/sub2_next_events.tsv']
         # cls.file_list = file_list
-        cls.extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+        extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+        cls.alt_path = os.path.realpath(os.path.join(extract_path, 'temp'))
+        cls.extract_path = extract_path
         test_root = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests/test_root')
         cls.test_root = test_root
         cls.test_paths = [os.path.join(test_root, file) for file in file_list]
@@ -34,8 +36,12 @@ def setUp(self):
             zip_ref.extractall(self.extract_path)
 
     def tearDown(self):
-        shutil.rmtree(self.test_root)
-        shutil.rmtree(self.data_root)
+        if os.path.exists(self.test_root):
+            shutil.rmtree(self.test_root)
+        if os.path.exists(self.data_root):
+            shutil.rmtree(self.data_root)
+        if os.path.exists(self.alt_path):
+            shutil.rmtree(self.alt_path)
 
     @classmethod
     def tearDownClass(cls):
@@ -43,11 +49,11 @@ def tearDownClass(cls):
 
     def test_main_events(self):
         self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation')
-        arg_list = [self.test_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+        arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path, '-x', 'derivatives',
                     '-f', 'events', '-e', '.tsv']
         main(arg_list)
         self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation')
-        json_path = os.path.realpath(os.path.join(self.derv_path, 'backups', BackupManager.DEFAULT_BACKUP_NAME,
+        json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME,
                                                   BackupManager.BACKUP_DICTIONARY))
         with open(json_path, 'r') as fp:
             key_dict = json.load(fp)
@@ -56,19 +62,18 @@ def test_main_events(self):
         self.assertEqual(len(file_list), 3, "The backup of events.tsv has the right number of files")
 
     def test_main_all(self):
-        arg_list = [self.test_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
-                    '-f', '*', '-e', '*']
+        arg_list = [self.test_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-bd', self.derv_path,
+                    '-x', 'derivatives', '-f', '*', '-e', '*']
 
         self.assertFalse(os.path.exists(self.derv_path), 'backup directory does not exist before creation')
         main(arg_list)
         self.assertTrue(os.path.exists(self.derv_path), 'backup directory exists before creation')
-        json_path = os.path.realpath(os.path.join(self.derv_path, 'backups', BackupManager.DEFAULT_BACKUP_NAME,
+        json_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME,
                                                   BackupManager.BACKUP_DICTIONARY))
         with open(json_path, 'r') as fp:
             key_dict = json.load(fp)
         self.assertEqual(len(key_dict), 4, "The backup of events.tsv does not include top_level.tsv")
-        back_path = os.path.realpath(os.path.join(self.derv_path, 'backups', 
-                                                  BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
+        back_path = os.path.realpath(os.path.join(self.derv_path, BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
         file_list1 = get_file_list(back_path)
         self.assertIsInstance(file_list1, list)
         self.assertEqual(len(file_list1), 4)
@@ -78,11 +83,11 @@ def test_main_task(self):
         self.assertTrue(os.path.exists(der_path))
         shutil.rmtree(der_path)
         self.assertFalse(os.path.exists(der_path))
-        arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+        arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
                     '-f', 'events', '-e', '.tsv', '-t', 'FacePerception']
         main(arg_list)
         self.assertTrue(os.path.exists(der_path))
-        back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups',
+        back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION,
                                                   BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
         self.assertTrue(os.path.exists(back_path))
         backed_files = get_file_list(back_path)
@@ -93,37 +98,33 @@ def test_main_bad_task(self):
         self.assertTrue(os.path.exists(der_path))
         shutil.rmtree(der_path)
         self.assertFalse(os.path.exists(der_path))
-        arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+        arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
                     '-f', 'events', '-e', '.tsv', '-t', 'Baloney']
         main(arg_list)
         self.assertTrue(os.path.exists(der_path))
-        back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups',
+        back_path = os.path.realpath(os.path.join(self.data_root, BackupManager.RELATIVE_BACKUP_LOCATION, 
                                                   BackupManager.DEFAULT_BACKUP_NAME, 'backup_root'))
         self.assertTrue(os.path.exists(back_path))
         backed_files = get_file_list(back_path)
         self.assertEqual(len(backed_files), 0)
 
     def test_alt_loc(self):
-        alt_path = os.path.realpath(os.path.join(self.extract_path, 'temp'))
-        if os.path.exists(alt_path):
-            shutil.rmtree(alt_path)
-        self.assertFalse(os.path.exists(alt_path))
-        arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives', '-w', alt_path,
+        if os.path.exists(self.alt_path):
+            shutil.rmtree(self.alt_path)
+        self.assertFalse(os.path.exists(self.alt_path))
+        arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives', '-bd', self.alt_path,
                     '-f', 'events', '-e', '.tsv', ]
         main(arg_list)
-        self.assertTrue(os.path.exists(alt_path))
-        back_path = os.path.realpath(os.path.join(alt_path, 'backups', 'default_back', 'backup_root'))
-        self.assertTrue(os.path.exists(back_path))
+        self.assertTrue(os.path.exists(self.alt_path))
+        back_path = os.path.realpath(os.path.join(self.alt_path, 'default_back/backup_root'))
         self.assertTrue(os.path.exists(back_path))
         backed_files = get_file_list(back_path)
         self.assertEqual(len(backed_files), 6)
-        if os.path.exists(alt_path):
-            shutil.rmtree(alt_path)
 
     def test_main_backup_exists(self):
         der_path = os.path.realpath(os.path.join(self.data_root, 'derivatives'))
         self.assertTrue(os.path.exists(der_path))
-        arg_list = [self.data_root, '-n', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
+        arg_list = [self.data_root, '-bn', BackupManager.DEFAULT_BACKUP_NAME, '-x', 'derivatives',
                     '-f', 'events', '-e', '.tsv', '-t', 'Baloney']
         with self.assertRaises(HedFileError) as context:
             main(arg_list)
diff --git a/tests/tools/remodeling/cli/test_run_remodel_restore.py b/tests/tools/remodeling/cli/test_run_remodel_restore.py
index c3645f3a..af75f1d2 100644
--- a/tests/tools/remodeling/cli/test_run_remodel_restore.py
+++ b/tests/tools/remodeling/cli/test_run_remodel_restore.py
@@ -17,15 +17,19 @@ def setUpClass(cls):
                                            '../../../data/remodel_tests/test_root_back1')
         cls.test_zip_back1 = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                           '../../../data/remodel_tests/test_root_back1.zip')
-
-        cls.extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+        extract_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../data/remodel_tests')
+        cls.alt_path = os.path.realpath(os.path.join(extract_path, 'temp'))
+        cls.extract_path = extract_path
 
     def setUp(self):
         with zipfile.ZipFile(self.test_zip_back1, 'r') as zip_ref:
             zip_ref.extractall(self.extract_path)
 
     def tearDown(self):
-        shutil.rmtree(self.test_root_back1)
+        if os.path.exists(self.test_root_back1):
+            shutil.rmtree(self.test_root_back1)
+        if os.path.exists(self.alt_path):
+            shutil.rmtree(self.alt_path)
 
     def test_main_restore(self):
         files1 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
@@ -35,7 +39,7 @@ def test_main_restore(self):
         os.remove(os.path.realpath(os.path.join(self.test_root_back1, 'top_level.tsv')))
         files2 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
         self.assertFalse(files2, "run_restore starts with the right number of files.")
-        arg_list = [self.test_root_back1, '-n', 'back1']
+        arg_list = [self.test_root_back1, '-bn', 'back1']
         main(arg_list)
         files3 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
         self.assertEqual(len(files3), len(files1), "run_restore restores all the files after")
@@ -48,11 +52,10 @@ def test_no_backup(self):
         self.assertEqual(context.exception.args[0], "BackupDoesNotExist")
 
     def test_restore_alt_loc(self):
-        alt_path = os.path.realpath(os.path.join(self.extract_path, 'temp'))
-        if os.path.exists(alt_path):
-            shutil.rmtree(alt_path)
-        self.assertFalse(os.path.exists(alt_path))
-        arg_list = [self.test_root_back1, '-n', 'back1', '-x', 'derivatives', '-w', alt_path,
+        if os.path.exists(self.alt_path):
+            shutil.rmtree(self.alt_path)
+        self.assertFalse(os.path.exists(self.alt_path))
+        arg_list = [self.test_root_back1, '-bn', 'back1', '-x', 'derivatives', '-bd', self.alt_path,
                     '-f', 'events', '-e', '.tsv']
         back_main(arg_list)
         files1 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
@@ -62,14 +65,11 @@ def test_restore_alt_loc(self):
         os.remove(os.path.realpath(os.path.join(self.test_root_back1, 'top_level.tsv')))
         files2 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
         self.assertFalse(files2, "run_restore starts with the right number of files.")
-        arg_list = [self.test_root_back1, '-n', 'back1', '-w',  alt_path]
+        arg_list = [self.test_root_back1, '-bn', 'back1', '-bd',  self.alt_path]
         main(arg_list)
         files3 = get_file_list(self.test_root_back1, exclude_dirs=['derivatives'])
         self.assertEqual(len(files3)+1, len(files1), "run_restore restores all the files after")
 
-        if os.path.exists(alt_path):
-            shutil.rmtree(alt_path)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/tools/remodeling/test_backup_manager.py b/tests/tools/remodeling/test_backup_manager.py
index 53c297cf..1d9a50e5 100644
--- a/tests/tools/remodeling/test_backup_manager.py
+++ b/tests/tools/remodeling/test_backup_manager.py
@@ -32,7 +32,7 @@ def setUpClass(cls):
         test_root_bad = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                       '../../data/remodel_tests/test_root_bad'))
         cls.test_root_bad = test_root_bad
-        cls.test_root_bad_backups = os.path.join(test_root_bad, BackupManager.RELATIVE_BACKUP_LOCATION, 'backups')
+        cls.test_root_bad_backups = os.path.join(test_root_bad, BackupManager.RELATIVE_BACKUP_LOCATION)
         cls.test_paths_bad = [os.path.join(test_root_bad, file) for file in file_list]
         cls.test_zip_bad = os.path.realpath(os.path.join(os.path.dirname(__file__),
                                                          '../../data/remodel_tests/test_root_bad.zip'))