From 392a8814a3e96b6762611f7c8eae4669087f3de5 Mon Sep 17 00:00:00 2001
From: IanCa <ianrcallanan@gmail.com>
Date: Thu, 8 Feb 2024 17:11:41 -0600
Subject: [PATCH] Completely re-write schema comparison summary Now generates a
 changelog based on the Major/Minor/Patch changes Fix issues with name for
 withStandard schema Improve comparison between schema entries to handle
 inherited attributes

---
 hed/schema/hed_schema_entry.py                |  22 +-
 hed/schema/schema_compare.py                  | 509 ++++++++----------
 hed/schema/schema_io/base2schema.py           |   1 +
 .../schema_tests/schema_compare.mediawiki     |  51 ++
 .../schema_tests/schema_compare2.mediawiki    |  62 +++
 tests/models/test_basic_search.py             |  27 +-
 tests/schema/test_schema_compare.py           | 106 ++--
 7 files changed, 427 insertions(+), 351 deletions(-)
 create mode 100644 tests/data/schema_tests/schema_compare.mediawiki
 create mode 100644 tests/data/schema_tests/schema_compare2.mediawiki

diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
index 4be137da..3ad99b2e 100644
--- a/hed/schema/hed_schema_entry.py
+++ b/hed/schema/hed_schema_entry.py
@@ -115,13 +115,8 @@ def section_key(self):
     def __eq__(self, other):
         if self.name != other.name:
             return False
-        if self.attributes != other.attributes:
-            # We only want to compare known attributes
-            self_attr = self.get_known_attributes()
-            other_attr = other.get_known_attributes()
-            # We can no longer be sure on the order of attribute values, since owl formatting has no order
-            if self_attr != other_attr and not self._compare_attributes_no_order(self_attr, other_attr):
-                return False
+        if not self._compare_attributes_no_order(self.attributes, other.attributes):
+            return False
         if self.description != other.description:
             return False
         return True
@@ -138,8 +133,9 @@ def get_known_attributes(self):
 
     @staticmethod
     def _compare_attributes_no_order(left, right):
-        left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
-        right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}
+        if left != right:
+            left = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in left.items()}
+            right = {name: (set(value.split(",")) if isinstance(value, str) else value) for (name, value) in right.items()}
 
         return left == right
 
@@ -235,6 +231,7 @@ def get_conversion_factor(self, unit_name):
         if HedKey.ConversionFactor in self.attributes:
             return float(self.derivative_units.get(unit_name))
 
+
 class HedTagEntry(HedSchemaEntry):
     """ A single tag entry in the HedSchema. """
     def __init__(self, *args, **kwargs):
@@ -252,6 +249,13 @@ def __init__(self, *args, **kwargs):
         # Descendent tags below this one
         self.children = {}
 
+    def __eq__(self, other):
+        if not super().__eq__(other):
+            return False
+        if not self._compare_attributes_no_order(self.inherited_attributes, other.inherited_attributes):
+            return False
+        return True
+
     def has_attribute(self, attribute, return_value=False):
         """ Returns th existence or value of an attribute in this entry.
 
diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py
index f128306d..b2d4455c 100644
--- a/hed/schema/schema_compare.py
+++ b/hed/schema/schema_compare.py
@@ -1,7 +1,7 @@
 from hed.schema.hed_schema import HedSchema, HedKey
 from hed.schema.hed_schema_constants import HedSectionKey
+from collections import defaultdict
 
-# This is still in design, means header attributes, epilogue, and prologue
 MiscSection = "misc"
 
 SectionEntryNames = {
@@ -12,6 +12,7 @@
     HedSectionKey.UnitModifiers: "Unit Modifier",
     HedSectionKey.Properties: "Property",
     HedSectionKey.Attributes: "Attribute",
+    MiscSection: "Misc Metadata"
 }
 
 SectionEntryNamesPlural = {
@@ -22,154 +23,64 @@
     HedSectionKey.UnitModifiers: "Unit Modifiers",
     HedSectionKey.Properties: "Properties",
     HedSectionKey.Attributes: "Attributes",
+    MiscSection: "Misc Metadata"
 }
 
 
-def find_matching_tags(schema1, schema2, output='raw', sections=(HedSectionKey.Tags,),
-                       include_summary=True):
-    """
-    Compare the tags in two library schemas.  This finds tags with the same term.
+def find_matching_tags(schema1, schema2, sections=(HedSectionKey.Tags,), return_string=True):
+    """Compare the tags in two library schemas.  This finds tags with the same term.
 
     Parameters:
         schema1 (HedSchema): The first schema to be compared.
         schema2 (HedSchema): The second schema to be compared.
-        output (str): Defaults to returning a python object dicts.
-                      'string' returns a single string
-                      'dict' returns a json style dictionary
         sections(list): the list of sections to compare.  By default, just the tags section.
                         If None, checks all sections including header, prologue, and epilogue.
-        include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the
-                               string option.  Lists the names of all the nodes that are missing or different.
+        return_string(bool): If False, returns the raw python dictionary(for tools etc. possible use)
     Returns:
-        dict, json style dict, or str: A dictionary containing matching entries in the Tags section of both schemas.
+        str or dict: Returns a formatted string or python dict
     """
     matches, _, _, unequal_entries = compare_schemas(schema1, schema2, sections=sections)
+    header_summary = _get_tag_name_summary((matches, unequal_entries))
 
+    # Combine the two dictionaries
     for section_key, section_dict in matches.items():
         section_dict.update(unequal_entries[section_key])
 
-    header_summary = _get_tag_name_summary((matches, unequal_entries))
-
-    if output == 'string':
-        final_string = ""
-        if include_summary:
-            final_string += _pretty_print_header(header_summary)
-        if sections is None:
-            sections = HedSectionKey
-        for section_key in sections:
-            type_name = SectionEntryNames[section_key]
-            entries = matches[section_key]
-            if not entries:
-                continue
-            final_string += f"{type_name} differences:\n"
-            final_string += _pretty_print_diff_all(entries, type_name=type_name) + "\n"
+    if return_string:
+        final_string = "Nodes with matching names:\n"
+        final_string += _pretty_print_header(header_summary)
+        # Do we actually want this...?  I'm just going to remove and add back later if needed.
+        # for section_key, entries in matches.items():
+        #     type_name = SectionEntryNames[section_key]
+        #     if not entries:
+        #         continue
+        #     final_string += f"{type_name} differences:\n"
+        #     final_string += _pretty_print_diff_all(entries, type_name=type_name) + "\n"
         return final_string
-    elif output == 'dict':
-        output_dict = {}
-        if include_summary:
-            output_dict["summary"] = {str(key): value for key, value in header_summary.items()}
-
-        for section_name, section_entries in matches.items():
-            output_dict[str(section_name)] = {}
-            for key, (entry1, entry2) in section_entries.items():
-                output_dict[str(section_name)][key] = _dict_diff_entries(entry1, entry2)
-        return output_dict
     return matches
 
 
-def compare_differences(schema1, schema2, output='raw', attribute_filter=None, sections=(HedSectionKey.Tags,),
-                        include_summary=True):
-    """
-    Compare the tags in two schemas, this finds any differences
-
-    Parameters:
-        schema1 (HedSchema): The first schema to be compared.
-        schema2 (HedSchema): The second schema to be compared.
-        output (str): 'raw' (default) returns a tuple of python object dicts with raw results.
-                      'string' returns a single string
-                      'dict' returns a json-style python dictionary that can be converted to JSON
-        attribute_filter (str, optional): The attribute to filter entries by.
-                                          Entries without this attribute are skipped.
-                                          The most common use would be HedKey.InLibrary
-                                          If it evaluates to False, no filtering is performed.
-        sections(list or None): the list of sections to compare.  By default, just the tags section.
-                If None, checks all sections including header, prologue, and epilogue.
-        include_summary(bool): If True, adds the 'summary' dict to the dict return option, and prints it with the
-                               string option.  Lists the names of all the nodes that are missing or different.
-
-    Returns:
-        tuple, str or dict: 
-        - Tuple with dict entries (not_in_schema1, not_in_schema1, unequal_entries).
-        - Formatted string with the output ready for printing.
-        - A Python dictionary with the output ready to be converted to JSON (for web output).
+def _pretty_print_header(summary_dict):
+    output_string = ""
+    first_entry = True
+    for section_key, tag_names in summary_dict.items():
+        if not tag_names:
+            continue
+        type_name = SectionEntryNamesPlural[section_key]
+        if not first_entry:
+            output_string += "\n"
+        output_string += f"{type_name}: "
 
-    Notes: The underlying dictionaries are:
-        - not_in_schema1(dict): Entries present in schema2 but not in schema1.
-        - not_in_schema2(dict): Entries present in schema1 but not in schema2.
-        - unequal_entries(dict): Entries that differ between the two schemas.
+        output_string += ", ".join(sorted(tag_names))
 
-    """
-    _, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter,
-                                                             sections=sections)
-
-    if sections is None:
-        sections = HedSectionKey
-
-    header_summary = _get_tag_name_summary((not_in_1, not_in_2, unequal_entries))
-    if output == 'string':
-        final_string = ""
-        if include_summary:
-            final_string += _pretty_print_header(header_summary)
-            if not final_string:
-                return final_string
-            final_string = ("Overall summary:\n================\n" + final_string + \
-                            "\n\n\nSummary details:\n================\n\n")
-        for section_key in sections:
-            val1, val2, val3 = unequal_entries[section_key], not_in_1[section_key], not_in_2[section_key]
-            type_name = SectionEntryNames[section_key]
-            if val1 or val2 or val3:
-                final_string += f"{type_name} differences:\n"
-                if val1:
-                    final_string += _pretty_print_diff_all(val1, type_name=type_name) + "\n"
-                if val2:
-                    final_string += _pretty_print_missing_all(val2, "Schema1", type_name) + "\n"
-                if val3:
-                    final_string += _pretty_print_missing_all(val3, "Schema2", type_name) + "\n"
-                final_string += "\n\n"
-        return final_string
-    elif output == 'dict':
-        # todo: clean this part up
-        output_dict = {}
-        current_section = {}
-        if include_summary:
-            output_dict["summary"] = {str(key): value for key, value in header_summary.items()}
-
-        output_dict["unequal"] = current_section
-        for section_name, section_entries in unequal_entries.items():
-            current_section[str(section_name)] = {}
-            for key, (entry1, entry2) in section_entries.items():
-                current_section[str(section_name)][key] = _dict_diff_entries(entry1, entry2)
-
-        current_section = {}
-        output_dict["not_in_1"] = current_section
-        for section_name, section_entries in not_in_1.items():
-            current_section[str(section_name)] = {}
-            for key, entry in section_entries.items():
-                current_section[str(section_name)][key] = _entry_to_dict(entry)
-
-        current_section = {}
-        output_dict["not_in_2"] = current_section
-        for section_name, section_entries in not_in_2.items():
-            current_section[str(section_name)] = {}
-            for key, entry in section_entries.items():
-                current_section[str(section_name)][key] = _entry_to_dict(entry)
-        return output_dict
-    return not_in_1, not_in_2, unequal_entries
+        output_string += "\n"
+        first_entry = False
+    return output_string
 
 
 def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, sections=(HedSectionKey.Tags,)):
-    """
-    Compare two schemas section by section.
+    """ Compare two schemas section by section.
+
     The function records matching entries, entries present in one schema but not in the other, and unequal entries.
 
     Parameters:
@@ -179,7 +90,7 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section
             Entries without this attribute are skipped.
             The most common use would be HedKey.InLibrary
             If it evaluates to False, no filtering is performed.
-        sections(list): the list of sections to compare.  By default, just the tags section.
+        sections(list or None): the list of sections to compare.  By default, just the tags section.
             If None, checks all sections including header, prologue, and epilogue.
 
     Returns:
@@ -216,16 +127,16 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section
         section1 = schema1[section_key]
         section2 = schema2[section_key]
 
-        attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'
+        name_attribute = 'short_tag_name' if section_key == HedSectionKey.Tags else 'name'
 
         # Get the name we're comparing things by
         for entry in section1.all_entries:
             if not attribute_filter or entry.has_attribute(attribute_filter):
-                dict1[getattr(entry, attribute)] = entry
+                dict1[getattr(entry, name_attribute)] = entry
 
         for entry in section2.all_entries:
             if not attribute_filter or entry.has_attribute(attribute_filter):
-                dict2[getattr(entry, attribute)] = entry
+                dict2[getattr(entry, name_attribute)] = entry
 
         # Find keys present in dict1 but not in dict2, and vice versa
         not_in_schema2[section_key] = {key: dict1[key] for key in dict1 if key not in dict2}
@@ -243,185 +154,227 @@ def compare_schemas(schema1, schema2, attribute_filter=HedKey.InLibrary, section
 
 
 def _get_tag_name_summary(tag_dicts):
+    """Combines the given dicts, so the output is section_key:list of keys"""
     out_dict = {section_key: [] for section_key in HedSectionKey}
     for tag_dict in tag_dicts:
         for section_key, section in tag_dict.items():
-            if section_key == MiscSection:
-                continue
             out_dict[section_key].extend(section.keys())
 
     return out_dict
 
 
-def _pretty_print_header(summary_dict):
-    
-    output_string = ""
-    first_entry = True
-    for section_key, tag_names in summary_dict.items():
-        if not tag_names:
-            continue
-        type_name = SectionEntryNamesPlural[section_key]
-        if not first_entry:
-            output_string += "\n"
-        output_string += f"{type_name}: "
+def _group_changes_by_section_with_unique_tags(change_dict):
+    """Similar to above, but on the patch note changes"""
+    organized_changes = defaultdict(set)
+    for change in change_dict:
+        section_key = change['section']
+        tag = change['tag']
+        organized_changes[section_key].add(tag)
+    return dict(organized_changes)
 
-        output_string += ", ".join(sorted(tag_names))
-
-        output_string += "\n"
-        first_entry = False
-    return output_string
-
-
-def _pretty_print_entry(entry):
-    """ Returns the contents of a HedSchemaEntry object as a list of strings.
-
-    Parameters:
-        entry (HedSchemaEntry): The HedSchemaEntry object to be displayed.
-
-    Returns:
-        List of strings representing the entry.
-    """
-    # Initialize the list with the name of the entry
-    output = [f"\tName: {entry.name}"]
 
-    # Add the description to the list if it exists
-    if entry.description is not None:
-        output.append(f"\tDescription: {entry.description}")
+def _sort_changes_by_severity(changes_dict):
+    """Sort the changelist by severity"""
+    for section in changes_dict.values():
+        order = {'Major': 1, 'Minor': 2, 'Patch': 3, 'Unknown': 4}
+        section.sort(key=lambda x: order.get(x['change_type'], order['Unknown']))
 
-    # Iterate over all attributes and add them to the list
-    for attr_key, attr_value in entry.attributes.items():
-        output.append(f"\tAttribute: {attr_key} - Value: {attr_value}")
 
-    return output
-
-
-def _entry_to_dict(entry):
-    """
-    Returns the contents of a HedSchemaEntry object as a dictionary.
-
-    Parameters:
-        entry (HedSchemaEntry): The HedSchemaEntry object to be displayed.
-
-    Returns:
-        Dictionary representing the entry.
-    """
-    output = {
-        "Name": entry.name,
-        "Description": entry.description,
-        "Attributes": entry.attributes
-    }
-    return output
-
-
-def _dict_diff_entries(entry1, entry2):
+def gather_schema_changes(schema1, schema2, attribute_filter=None):
     """
-    Returns the differences between two HedSchemaEntry objects as a dictionary.
+    Compare two schemas section by section, generated a changelog
 
     Parameters:
-        entry1 (HedSchemaEntry or str): The first entry.
-        entry2 (HedSchemaEntry or str): The second entry.
+        schema1 (HedSchema): The first schema to be compared.
+        schema2 (HedSchema): The second schema to be compared.
+        attribute_filter (str, optional): The attribute to filter entries by.
+            Entries without this attribute are skipped.
+            The most common use would be HedKey.InLibrary
+            If it evaluates to False, no filtering is performed.
 
     Returns:
-        Dictionary representing the differences.
-    """
-    diff_dict = {}
-
-    if isinstance(entry1, str):
-        # Handle special case ones like prologue
-        if entry1 != entry2:
-            diff_dict["value"] = {
-                "Schema1": entry1,
-                "Schema2": entry2
-            }
-    else:
-        if entry1.name != entry2.name:
-            diff_dict["name"] = {
-                "Schema1": entry1.name,
-                "Schema2": entry2.name
-            }
-
-        # Checking if both entries have the same description
-        if entry1.description != entry2.description:
-            diff_dict["description"] = {
-                "Schema1": entry1.description,
-                "Schema2": entry2.description
-            }
-
-        # Comparing attributes
-        for attr in set(entry1.attributes.keys()).union(entry2.attributes.keys()):
-            if entry1.attributes.get(attr) != entry2.attributes.get(attr):
-                diff_dict[attr] = {
-                    "Schema1": entry1.attributes.get(attr),
-                    "Schema2": entry2.attributes.get(attr)
-                }
-
-    return diff_dict
-
-
-def _pretty_print_diff_entry(entry1, entry2):
+        changelog(dict): A dict organized by section with the changes
     """
-    Returns the differences between two HedSchemaEntry objects as a list of strings.
+    _, not_in_1, not_in_2, unequal_entries = compare_schemas(schema1, schema2, attribute_filter=attribute_filter,
+                                                             sections=None)
+    change_dict = defaultdict(list)
+
+    # Items removed from schema
+    for section_key, section in not_in_2.items():
+        for tag, _ in section.items():
+            type_name = SectionEntryNamesPlural[section_key]
+            if section_key == HedSectionKey.Tags:
+                change_dict[section_key].append(
+                    {'change_type': 'Major', 'change': f'Tag {tag} deleted from {type_name}',
+                     'tag': tag})
+            else:
+                # Only here for completeness - these aren't in the list
+                change_dict[section_key].append(
+                    {'change_type': 'Unknown', 'change': f"Item {tag} removed from {type_name}",
+                     'tag': tag})
+
+    # Items added to schema
+    for section_key, section in not_in_1.items():
+        for tag, _ in section.items():
+            type_name = SectionEntryNamesPlural[section_key]
+            change_dict[section_key].append({'change_type': 'Minor', 'change': f'Item {tag} added to {type_name}',
+                                             'tag': tag})
+
+    # Now the much more complex comparing an individual tag changes
+    for section_key, changes in unequal_entries.items():
+        if section_key == MiscSection:
+            for misc_section, (value1, value2) in changes.items():
+                # todo: consider fine grained header changes
+                change_dict[section_key].append(
+                    {'change_type': 'Patch', 'change': f'{misc_section} changed from "{value1}" to "{value2}"',
+                     'tag': misc_section})
+            continue
+        for tag, (entry1, entry2) in changes.items():
+            if section_key == HedSectionKey.UnitClasses:
+                for unit in entry1.units:
+                    if unit not in entry2.units:
+                        change_dict[section_key].append(
+                            {'change_type': 'Major', 'change': f'Unit {unit} removed from {entry1.name}',
+                             'tag': tag})
+                for unit in entry2.units:
+                    if unit not in entry1.units:
+                        change_dict[section_key].append(
+                            {'change_type': 'Patch', 'change': f'Unit {unit} added to {entry2.name}',
+                             'tag': tag})
+            if section_key == HedSectionKey.Tags:
+                for unit_class in entry1.unit_classes:
+                    if unit_class not in entry2.unit_classes:
+                        change_dict[section_key].append(
+                            {'change_type': 'Major',
+                             'change': f'Unit class {unit_class} removed from {entry1.short_tag_name}',
+                             'tag': tag})
+                for unit_class in entry2.unit_classes:
+                    if unit_class not in entry1.unit_classes:
+                        change_dict[section_key].append(
+                            {'change_type': 'Patch',
+                             'change': f'Unit class {unit_class} added to {entry2.short_tag_name}',
+                             'tag': tag})
+
+                for value_class in entry1.value_classes:
+                    if value_class not in entry2.value_classes:
+                        change_dict[section_key].append(
+                            {'change_type': 'Unknown',
+                             'change': f'Value class {value_class} removed from {entry1.short_tag_name}',
+                             'tag': tag})
+                for value_class in entry2.value_classes:
+                    if value_class not in entry1.value_classes:
+                        change_dict[section_key].append(
+                            {'change_type': 'Minor',
+                             'change': f'Value class {value_class} added to {entry2.short_tag_name}',
+                             'tag': tag})
+
+                if entry1.long_tag_name != entry2.long_tag_name:
+                    change_dict[section_key].append(
+                        {'change_type': 'Minor', 'change': f'Tag {entry1.short_tag_name} moved in schema',
+                         'tag': tag})
+
+                suggested_tag1 = sorted(entry1.inherited_attributes.get(HedKey.SuggestedTag, "").split(","))
+                suggested_tag2 = sorted(entry2.inherited_attributes.get(HedKey.SuggestedTag, "").split(","))
+                if suggested_tag1 != suggested_tag2:
+                    change_dict[section_key].append(
+                        {'change_type': 'Patch', 'change': f'Suggested tag changed on {entry1.name}',
+                         'tag': tag})
+
+                related_tag1 = sorted(entry1.inherited_attributes.get(HedKey.RelatedTag, "").split(","))
+                related_tag2 = sorted(entry2.inherited_attributes.get(HedKey.RelatedTag, "").split(","))
+                if related_tag1 != related_tag2:
+                    change_dict[section_key].append(
+                        {'change_type': 'Patch', 'change': f'Related tag changed on {entry1.name}',
+                         'tag': tag})
+
+            _check_other_attributes(entry1, entry2, tag, section_key, change_dict)
+            if entry1.description != entry2.description:
+                change_dict[section_key].append({'change_type': 'Patch', 'change': f'Description of {tag} modified',
+                                                 'tag': tag})
+
+    _sort_changes_by_severity(change_dict)
+    return change_dict
+
+
+def pretty_print_change_dict(change_dict, title="Schema changes"):
+    """Formats the change_dict into a string.
 
     Parameters:
-        entry1 (HedSchemaEntry): The first entry.
-        entry2 (HedSchemaEntry): The second entry.
+        change_dict(dict): The result from calling gather_schema_changes 
+        title(str): Optional header to add, a default on will be added otherwise.
 
     Returns:
-        List of strings representing the differences.
+        changelog(str): the changes listed out by section
     """
-    diff_dict = _dict_diff_entries(entry1, entry2)
-    diff_lines = []
-
-    for key, value in diff_dict.items():
-        diff_lines.append(f"\t{key}:")
-        for schema, val in value.items():
-            diff_lines.append(f"\t\t{schema}: {val}")
+    final_strings = []
+    if change_dict:
+        final_strings.append(title)
+        for section_key, section_dict in change_dict.items():
+            name = SectionEntryNamesPlural.get(section_key, section_key)
+            final_strings.append(f"{name}:")
+            for item in section_dict:
+                change, tag, change_type = item['change'], item['tag'], item['change_type']
+                final_strings.append(f"\t{tag} ({change_type}): {change}")
+    return "\n".join(final_strings)
 
-    return diff_lines
 
-
-def _pretty_print_diff_all(entries, type_name=""):
-    """
-    Formats the differences between pairs of HedSchemaEntry objects.
+def compare_differences(schema1, schema2, attribute_filter=None, title=""):
+    """Compare the tags in two schemas, this finds any differences
 
     Parameters:
-        entries (dict): A dictionary where each key maps to a pair of HedSchemaEntry objects.
-        type_name(str): The type to identify this as, such as Tag
+        schema1 (HedSchema): The first schema to be compared.
+        schema2 (HedSchema): The second schema to be compared.
+        attribute_filter (str, optional): The attribute to filter entries by.
+                                          Entries without this attribute are skipped.
+                                          The most common use would be HedKey.InLibrary
+                                          If it evaluates to False, no filtering is performed.
+        title(str): Optional header to add, a default on will be added otherwise.
+
     Returns:
-        diff_string(str): The differences found in the dict
+        changelog(str): the changes listed out by section
     """
-    output = []
-    if not type_name.endswith(" "):
-        type_name += " "
-    if not entries:
-        return ""
-    for key, (entry1, entry2) in entries.items():
-        output.append(f"{type_name}'{key}':")
-        output += _pretty_print_diff_entry(entry1, entry2)
-        output.append("")
+    changelog = gather_schema_changes(schema1, schema2, attribute_filter=attribute_filter)
+    if not title:
+        title = f"Differences between {schema1.name} and {schema2.name}"
+    changelog_string = pretty_print_change_dict(changelog, title=title)
 
-    return "\n".join(output)
+    return changelog_string
 
 
-def _pretty_print_missing_all(entries, schema_name, type_name):
-    """
-    Formats the missing entries from schema_name.
-
-    Parameters:
-        entries (dict): A dictionary where each key maps to a pair of HedSchemaEntry objects.
-        schema_name(str): The name these entries are missing from
-        type_name(str): The type to identify this as, such as Tag
-    Returns:
-        diff_string(str): The differences found in the dict
-    """
-    output = []
-    if not entries:
-        return ""
-    if not type_name.endswith(" "):
-        type_name += " "
-    for key, entry in entries.items():
-        output.append(f"{type_name}'{key}' not in '{schema_name}':")
-        output += _pretty_print_entry(entry)
-        output.append("")
-
-    return "\n".join(output)
+def _check_other_attributes(entry1, entry2, tag, section_key, change_dict):
+    """Compare non specialized attributes"""
+    already_checked_attributes = [HedKey.RelatedTag, HedKey.SuggestedTag, HedKey.ValueClass, HedKey.UnitClass]
+    unique_keys = set(entry1.attributes.keys()).union(entry2.attributes.keys())
+    if section_key == HedSectionKey.Tags:
+        unique_inherited_keys = set(entry1.inherited_attributes.keys()).union(entry2.inherited_attributes.keys())
+    else:
+        unique_inherited_keys = unique_keys
+    # Combine unique keys from both attributes and inherited attributes, then remove already checked attributes
+    all_unique_keys = unique_keys.union(unique_inherited_keys).difference(already_checked_attributes)
+
+    for key in all_unique_keys:
+        is_inherited = key in unique_inherited_keys
+        is_direct = key in unique_keys
+
+        if section_key == HedSectionKey.Tags:
+            value1 = entry1.inherited_attributes.get(key)
+            value2 = entry2.inherited_attributes.get(key)
+        else:
+            value1 = entry1.attributes.get(key)
+            value2 = entry2.attributes.get(key)
+
+        if value1 != value2:
+            if is_inherited and not is_direct:
+                change_dict[section_key].append({
+                    "change_type": "Minor",
+                    "change": f"Inherited attribute '{key}' modified from '{value1}' to '{value2}'",
+                    "tag": tag,
+                    "section": section_key
+                })
+            else:
+                change_dict[section_key].append({
+                    "change_type": "Patch",
+                    "change": f"Attribute '{key}' modified from '{value1}' to '{value2}'",
+                    "tag": tag,
+                    "section": section_key
+                })
diff --git a/hed/schema/schema_io/base2schema.py b/hed/schema/schema_io/base2schema.py
index 2f48775c..bc193eaf 100644
--- a/hed/schema/schema_io/base2schema.py
+++ b/hed/schema/schema_io/base2schema.py
@@ -114,6 +114,7 @@ def _load(self):
             # Copy the non-alterable cached schema
             self._schema = copy.deepcopy(base_version)
             self._schema.filename = self.filename
+            self._schema.name = self.name  # Manually set name here as we don't want to pass it to load_schema_version
             self._schema.header_attributes = saved_attr
             self._loading_merged = False
 
diff --git a/tests/data/schema_tests/schema_compare.mediawiki b/tests/data/schema_tests/schema_compare.mediawiki
new file mode 100644
index 00000000..397f26d8
--- /dev/null
+++ b/tests/data/schema_tests/schema_compare.mediawiki
@@ -0,0 +1,51 @@
+HED version="1.1.0" library="compare" withStandard="8.2.0" unmerged="True"
+
+'''Prologue'''
+
+!# start schema
+
+'''Tag1'''
+* Moving-tag
+* Moving-tag-changed
+* Deleting-tag
+* RemovingUnitClass {takesValue}
+** # {unitClass=accelerationUnits, unitClass=physicalLengthUnits}
+* AddingUnitClass {takesValue}
+** # {unitClass=physicalLengthUnits}
+
+
+'''Tag2'''
+* RelatedTagChanging {relatedTag=Tag1, relatedTag=Tag2}
+** InheritedRelatedTagChanging
+* SuggestedTagChanging {suggestedTag=Tag1, suggestedTag=Tag2}
+* RemovingValueClass {takesValue}
+** # {valueClass=numericClass, valueClass=nameClass}
+* AddingValueClass {takesValue}
+** # {valueClass=numericClass}
+
+'''Tag3'''
+* DescriptionChanging [TagDescription]
+
+'''Tag4'''{extensionAllowed}
+* ExtensionAllowedRemoval1
+* ExtensionAllowedRemoval2
+
+!# end schema
+
+'''Unit classes'''
+* addingUnit
+** unit1
+* removingUnit
+** unit3
+** unit4
+'''Unit modifiers'''
+
+'''Value classes'''
+
+'''Schema attributes'''
+
+'''Properties'''
+'''Epilogue'''
+Epilogue Removed
+
+!# end hed
diff --git a/tests/data/schema_tests/schema_compare2.mediawiki b/tests/data/schema_tests/schema_compare2.mediawiki
new file mode 100644
index 00000000..2747d72b
--- /dev/null
+++ b/tests/data/schema_tests/schema_compare2.mediawiki
@@ -0,0 +1,62 @@
+HED version="1.2.0" library="compare" withStandard="8.2.0" unmerged="True"
+
+'''Prologue'''
+Prologue Added
+
+!# start schema
+
+'''Tag1'''
+* RemovingUnitClass {takesValue}
+** # {unitClass=accelerationUnits}
+* AddingUnitClass {takesValue}
+** # {unitClass=accelerationUnits, unitClass=physicalLengthUnits}
+
+'''Tag2'''
+* Moving-tag
+* Moving-tag-changed {requireChild}
+** AddedTag
+
+* RemovingValueClass {takesValue}
+** # {valueClass=numericClass}
+* AddingValueClass {takesValue}
+** # {valueClass=numericClass, valueClass=nameClass}
+
+* RelatedTagChanging {relatedTag=Tag1}
+** InheritedRelatedTagChanging
+* SuggestedTagChanging {suggestedTag=Tag2}
+
+'''Tag3'''
+* DescriptionChanging [TagDescriptionNew]
+
+'''Tag4'''
+* ExtensionAllowedRemoval1
+
+'''Tag5'''
+* ExtensionAllowedRemoval2
+
+!# end schema
+
+'''Unit classes'''
+* addingUnit
+** unit1
+** unit2
+* removingUnit
+** unit4
+* NewUnitClass
+
+'''Unit modifiers'''
+* NewUnitModifier
+
+'''Value classes'''
+* NewValueClass
+
+'''Schema attributes'''
+* NewAttribute
+
+'''Properties'''
+* NewProperty
+
+'''Epilogue'''
+
+
+!# end hed
diff --git a/tests/models/test_basic_search.py b/tests/models/test_basic_search.py
index 519c9bae..0124a0f5 100644
--- a/tests/models/test_basic_search.py
+++ b/tests/models/test_basic_search.py
@@ -8,7 +8,7 @@
 from hed.models.basic_search import find_words, check_parentheses, reverse_and_flip_parentheses, \
     construct_delimiter_map, verify_search_delimiters, find_matching
 import numpy as np
-
+from hed.models.df_util import convert_to_form
 
 class TestNewSearch(unittest.TestCase):
     @classmethod
@@ -26,11 +26,34 @@ def test_find_matching_results(self):
         result1 = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
         result2 = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
 
-        # Add assertions
         self.assertTrue(np.sum(result1) > 0, "result1 should have some true values")
         self.assertTrue(np.sum(result2) > 0, "result2 should have some true values")
         self.assertTrue(np.sum(result1) < np.sum(result2), "result1 should have fewer true values than result2")
 
+        # Verify we get the same results in both tag forms
+        df_copy = self.df.copy()
+        convert_to_form(df_copy, self.schema, "long_tag")
+
+        result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
+        result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
+
+        self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values")
+        self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values")
+        self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2")
+        self.assertTrue(result1.equals(result1b))
+        self.assertTrue(result2.equals(result2b))
+
+        convert_to_form(df_copy, self.schema, "short_tag")
+
+        result1b = basic_search.find_matching(self.df, "(Face, Item-interval/1)")
+        result2b = basic_search.find_matching(self.df, "(Face, Item-interval/1*)")
+
+        self.assertTrue(np.sum(result1b) > 0, "result1 should have some true values")
+        self.assertTrue(np.sum(result2b) > 0, "result2 should have some true values")
+        self.assertTrue(np.sum(result1b) < np.sum(result2b), "result1 should have fewer true values than result2")
+        self.assertTrue(result1.equals(result1b))
+        self.assertTrue(result2.equals(result2b))
+
 
 class TestFindWords(unittest.TestCase):
     def test_basic(self):
diff --git a/tests/schema/test_schema_compare.py b/tests/schema/test_schema_compare.py
index f6b1ceed..31302c66 100644
--- a/tests/schema/test_schema_compare.py
+++ b/tests/schema/test_schema_compare.py
@@ -1,21 +1,27 @@
 import unittest
 import json
+import copy
+
 
 from hed.schema import HedKey, HedSectionKey
-from hed.schema.schema_compare import compare_schemas, find_matching_tags, \
-    _pretty_print_diff_all, _pretty_print_missing_all, compare_differences
-from hed import load_schema_version
+from hed.schema.schema_compare import compare_schemas
+from hed.schema.schema_compare import gather_schema_changes, find_matching_tags, pretty_print_change_dict, compare_differences
+from hed import load_schema_version, load_schema
 
 from . import util_create_schemas
-
+import os
 
 class TestSchemaComparison(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.base_data = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/schema_tests/')
+
     def test_find_matching_tags(self):
         # create entries for schema1
         schema1 = util_create_schemas.load_schema1()
         schema2 = util_create_schemas.load_schema2()
 
-        result = find_matching_tags(schema1, schema2)
+        result = find_matching_tags(schema1, schema2, return_string=False)
         # Check if the result is correct
         self.assertEqual(len(result[HedSectionKey.Tags]), 3)
         self.assertIn("TestNode", result[HedSectionKey.Tags])
@@ -24,27 +30,9 @@ def test_find_matching_tags(self):
         self.assertNotIn("TestNode4", result[HedSectionKey.Tags])
         self.assertNotIn("TestNode5", result[HedSectionKey.Tags])
 
-        # Test with include_summary=True
-        match_string = find_matching_tags(schema1, schema2, output='string', include_summary=True)
+        match_string = find_matching_tags(schema1, schema2)
         self.assertIsInstance(match_string, str)
         self.assertIn("Tags:", match_string)
-        # print(match_string)
-
-        json_style_dict = find_matching_tags(schema1, schema2, output='dict', include_summary=True)
-        self.assertIsInstance(json_style_dict, dict)
-        self.assertIn("summary", json_style_dict)
-
-        result_string = json.dumps(json_style_dict, indent=4)
-        self.assertIsInstance(result_string, str)
-
-        # Optionally, you can also test the case without include_summary
-        match_string_no_summary = find_matching_tags(schema1, schema2, output='string', include_summary=False)
-        self.assertIsInstance(match_string_no_summary, str)
-        self.assertNotIn("Tags:", match_string_no_summary)
-
-        json_style_dict_no_summary = find_matching_tags(schema1, schema2, output='dict', include_summary=False)
-        self.assertIsInstance(json_style_dict_no_summary, dict)
-        self.assertNotIn("summary", json_style_dict_no_summary)
 
     def test_compare_schemas(self):
         schema1 = util_create_schemas.load_schema1()
@@ -67,11 +55,22 @@ def test_compare_schemas(self):
         self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 1)  # No unequal entries should be found
         self.assertIn("TestNode3", unequal_entries[HedSectionKey.Tags])
 
+    def test_compare_and_summarize_schemas_test(self):
+        schema1 = load_schema(os.path.join(self.base_data, "schema_compare.mediawiki"), name="Schema1")
+        schema2 = load_schema(os.path.join(self.base_data, "schema_compare2.mediawiki"), name="Schema2")
+
+        result = gather_schema_changes(schema1, schema2)
+        self.assertEqual(sum(len(x) for x in result.values()), 30)
+        schema_string = pretty_print_change_dict(result, title=f"Differences between {schema1.name} and {schema2.name}")
+        # this test may need updating if the text format changes
+        found_issues = schema_string.count("):")
+        self.assertEqual(found_issues, 30)
+
     def test_compare_differences(self):
         schema1 = util_create_schemas.load_schema1()
         schema2 = util_create_schemas.load_schema2()
 
-        not_in_schema1, not_in_schema2, unequal_entries = compare_differences(schema1, schema2)
+        _, not_in_schema1, not_in_schema2, unequal_entries = compare_schemas(schema1, schema2)
 
         self.assertEqual(len(not_in_schema2[HedSectionKey.Tags]), 1)  # One tag not in schema2
         self.assertIn("TestNode4", not_in_schema2[HedSectionKey.Tags])  # "TestNode4" is not in schema2
@@ -82,49 +81,32 @@ def test_compare_differences(self):
         self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 1)  # No unequal entries should be found
         self.assertIn("TestNode3", unequal_entries[HedSectionKey.Tags])
 
-        # Test with include_summary=True, string output
-        diff_string_with_summary = compare_differences(schema1, schema2, output='string', include_summary=True)
+        diff_string_with_summary = compare_differences(schema1, schema2)
         self.assertIsInstance(diff_string_with_summary, str)
         self.assertIn("Tags:", diff_string_with_summary)
-        # print(diff_string_with_summary)
-
-        # Test with include_summary=True, dict output
-        diff_dict_with_summary = compare_differences(schema1, schema2, output='dict', include_summary=True)
-        self.assertIsInstance(diff_dict_with_summary, dict)
-        self.assertIn("summary", diff_dict_with_summary)
-
-        # Optionally, test without include_summary, string output
-        diff_string_no_summary = compare_differences(schema1, schema2, output='string', include_summary=False)
-        self.assertIsInstance(diff_string_no_summary, str)
-        self.assertNotIn("Tags:", diff_string_no_summary)
-
-        # Optionally, test without include_summary, dict output
-        diff_dict_no_summary = compare_differences(schema1, schema2, output='dict', include_summary=False)
-        self.assertIsInstance(diff_dict_no_summary, dict)
-        self.assertNotIn("summary", diff_dict_no_summary)
 
     def test_compare_score_lib_versions(self):
         schema1 = load_schema_version("score_1.0.0")
         schema2 = load_schema_version("score_1.1.0")
-        not_in_schema1, not_in_schema2, unequal_entries = compare_differences(schema1, schema2,
-                                                                              attribute_filter=HedKey.InLibrary)
- 
-        
+        _, not_in_schema1, not_in_schema2, unequal_entries = compare_schemas(schema1, schema2,
+                                                                             attribute_filter=HedKey.InLibrary)
+
         self.assertEqual(len(not_in_schema1[HedSectionKey.Tags]), 21)
         self.assertEqual(len(not_in_schema2[HedSectionKey.Tags]), 10)
-        self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 61)
-
-        diff_string = compare_differences(schema1, schema1, attribute_filter=HedKey.InLibrary, output='string',
-                                          sections=None)
+        self.assertEqual(len(unequal_entries[HedSectionKey.Tags]), 80)
+
+        diff_string = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary)
+        # Do a half-hearted check that all the above showed up in the output
+        self.assertTrue(diff_string)
+        for item in not_in_schema1[HedSectionKey.Tags].keys():
+            self.assertIn(item, diff_string)
+        for item in not_in_schema2[HedSectionKey.Tags].keys():
+            self.assertIn(item, diff_string)
+        for item in unequal_entries[HedSectionKey.Tags].keys():
+            self.assertIn(item, diff_string)
+
+    def test_compare_identical_schemas(self):
+        schema1 = load_schema_version("score_1.0.0")
+        schema2 = copy.deepcopy(schema1)
+        diff_string = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary)
         self.assertFalse(diff_string)
-        diff_string = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary, output='string',
-                                          sections=None)
-
-        self.assertIsInstance(diff_string, str)
-
-        json_style_dict = compare_differences(schema1, schema2, attribute_filter=HedKey.InLibrary, output='dict',
-                                              sections=None)
-        self.assertIsInstance(json_style_dict, dict)
-
-        result_string = json.dumps(json_style_dict, indent=4)
-        self.assertIsInstance(result_string, str)