diff --git a/hed/models/basic_search.py b/hed/models/basic_search.py index ae47b71e..9301a0cc 100644 --- a/hed/models/basic_search.py +++ b/hed/models/basic_search.py @@ -8,14 +8,18 @@ def find_matching(series, search_string, regex=False): """ Finds lines in the series that match the search string and returns a mask. Syntax Rules: - - '@': Prefixing a term in the search string means the object must appear anywhere within a line. + - '@': Prefixing a term in the search string means the term must appear anywhere within a line. + - '~': Prefixing a term in the search string means the term must NOT appear within a line. - Parentheses: Elements within parentheses must appear in the line with the same level of nesting. - eg: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't - start in the same group. + e.g.: Search string: "(A), (B)" will match "(A), (B, C)", but not "(A, B)", since they don't + start in the same group. - "LongFormTag*": A * will match any remaining word(anything but a comma or parenthesis) - An individual term can be arbitrary regex, but it is limited to single continuous words. Notes: + - Specific words only care about their level relative to other specific words, not overall. + e.g. "(A, B)" will find: "A, B", "(A, B)", (A, (C), B)", or ((A, B))" + - If you have no grouping or anywhere words in the search, it assumes all terms are anywhere words. - The format of the series should match the format of the search string, whether it's in short or long form. - To enable support for matching parent tags, ensure that both the series and search string are in long form. @@ -33,19 +37,19 @@ def find_matching(series, search_string, regex=False): if not regex: # Replace *'s with a reasonable value for people who don't know regex search_string = re.sub(r'(?