diff --git a/mmtbx/geometry_restraints/geo_file_parsing.py b/mmtbx/geometry_restraints/geo_file_parsing.py index fa89347e86..c9e5e36cfb 100644 --- a/mmtbx/geometry_restraints/geo_file_parsing.py +++ b/mmtbx/geometry_restraints/geo_file_parsing.py @@ -29,7 +29,7 @@ class Entry: """ name = None # Name or label for the class - def __init__(self,lines,origin_id=0): + def __init__(self,lines,origin_id=0,origin_label="covalent"): """ An entry is initialized first, then data is added with entry.lines.append() @@ -41,8 +41,8 @@ def __init__(self,lines,origin_id=0): self.i_seqs = [] # list of integer i_seqs (if possible) self.atom_labels = [] # list of string atom label from .geo self._numerical = None # a dict of numerical geo data - self.labels_are_i_seqs = None # boolean, atom labels are i_seqs self.origin_id = origin_id + self.origin_label = origin_label # Initialize result data structures self._proxy = None @@ -51,8 +51,17 @@ def __init__(self,lines,origin_id=0): self._prepare() # Check if labels are i_seqs (integers) - self.labels_are_i_seqs, self.i_seqs = self._check_labels_are_i_seqs(self.atom_labels) + labels_are_i_seqs, self.i_seqs = self._check_labels_are_i_seqs(self.atom_labels) + if labels_are_i_seqs: + self.atom_labels = [] + @property + def labels_are_available(self): + return len(self.atom_labels)>0 + @property + def i_seqs_are_available(self): + return len(self.i_seqs)>0 + def _prepare(self): # Parse Atom labels values = [] @@ -84,10 +93,6 @@ def _check_labels_are_i_seqs(self,atom_labels): i_seqs = [] return check, i_seqs - @property - def has_i_seqs(self): - return len(self.i_seqs)>0 - @property def record(self): """ @@ -126,7 +131,7 @@ def proxy(self): """ Only create a proxy object if necessary, and if so only do it once """ - if not self._proxy and self.has_i_seqs: + if not self._proxy and self.i_seqs_are_available: self._proxy = self.to_proxy() return self._proxy @@ -290,7 +295,10 @@ def __init__(self,*args,**kwargs): self.atom_labels_i = [] self.atom_labels_j = [] super().__init__(*args,**kwargs) - self.labels_are_i_seqs, self.i_seqs = self._check_labels_are_i_seqs(self.atom_labels) + labels_are_i_seqs, self.i_seqs = self._check_labels_are_i_seqs(self.atom_labels) + if labels_are_i_seqs: + self.atom_labels_i = [] + self.atom_labels_j = [] self.i_seqs, self.j_seqs = self.i_seqs # unpack tuple @@ -446,7 +454,7 @@ class GeoParser: """ - def __init__(self,geo_lines,model=None,entry_config=None,strict_counts=True): + def __init__(self,geo_lines,model=None,entry_config=None): """ Initialize with a list of Entry subclasses @@ -454,18 +462,14 @@ def __init__(self,geo_lines,model=None,entry_config=None,strict_counts=True): geo_lines (list): List of line strings from .geo file model (mmtbx.model.manager): Optional model file, a source of atom_label: i_seq matching. entry_config (dict): Configuration dict - strict_counts (bool): Whether or not to enforce # of entries consistent with .geo header """ # Set initial arguments self.entry_config = (entry_config if entry_config else entry_config_default) self.model = model - self.strict_counts = strict_counts # Initialize parsing variables self.lines = geo_lines + ["\n"] - self.expected_counts = defaultdict(int) - self.actual_counts = defaultdict(int) # Initialize result variables self.entries = defaultdict(list) # Entry instances @@ -474,8 +478,6 @@ def __init__(self,geo_lines,model=None,entry_config=None,strict_counts=True): # Parse the file self._parse() - if self.strict_counts: - self._validate_counts() # If model present, add i_seqs if self.model: @@ -489,11 +491,15 @@ def proxies(self): def has_proxies(self): return self.proxies is not None + @property + def i_seqs_are_available(self): + return all([entry.i_seqs_are_available for entry in self.entries_list]) + def _fill_labels_from_model(self, model): """ Add i_seq attributes for each atom on each entry """ - if not self.labels_are_i_seqs: + if not self.i_seqs_are_available: # make i_seq:id_str mapping map_idstr_to_iseq = { atom.id_str():atom.i_seq for atom in model.get_atoms()} @@ -537,9 +543,11 @@ def _end_entry(self, entry_start_line_number, i, entries_info): lines_for_entry = self.lines[entry_start_line_number:i] new_entry = entries_info.entry_class( lines=lines_for_entry, - origin_id=entries_info.origin_id) + origin_id=entries_info.origin_id, + origin_label=entries_info.origin_label, + ) # add new_entry to somewhere - self.entries[entries_info.entry_class.name].append(new_entry) + self.entries[entries_info.origin_header].append(new_entry) return i def _parse(self): @@ -553,16 +561,16 @@ def _parse(self): result = origin_ids.get_origin_label_and_internal(l) if result: # if recognized as header, unpack result, store in entries_info - origin_id, header, _, num = result + origin_id, header, label, num = result entry_class = self.entry_config[header]["entry_class"] entry_trigger = self.entry_config[header]["entry_trigger"] entries_info = group_args( entry_class = entry_class, origin_id = origin_id, + origin_label = label, + origin_header = header, entry_trigger = entry_trigger, ) - info_key = str((entry_class.name,origin_id)) - self.expected_counts[info_key] = num entry_start_line_number = -1 elif l.startswith("Sorted by"): @@ -578,29 +586,6 @@ def _parse(self): # entry continues, do nothing pass - def _validate_counts(self): - """ - Verify numbers match .geo headers, fail assertion otherwise. - - If failed, will print expected and actual counts. - """ - - # Populate actual counts - for header_name,entries in self.entries.items(): - for entry in entries: - info_key = str((header_name, entry.origin_id)) - self.actual_counts[info_key]+=1 - - if not self.expected_counts == self.actual_counts: - print("Validate counts error:") - print() - print("Expected counts:") - print(json.dumps(self.expected_counts,indent=2)) - print() - print("Actual counts:") - print(json.dumps(self.actual_counts,indent=2)) - assert False, "Expected and actual counts for (restraint header name, origin_id) pairs do not match." - @property def records(self): """ @@ -616,23 +601,20 @@ def records(self): self._records = record_dict return self._records - @property - def labels_are_i_seqs(self): - return all([entry.labels_are_i_seqs for entry in self.entries_list]) def build_proxies(self): """ Convert the entry objects to cctbx proxy objects. Collect into a dict of lists of proxies. """ - if not self.model and not self.labels_are_i_seqs: + if not self.model and not self.i_seqs_are_available: raise Sorry("Cannot build proxies without instantiating with a model.") self._proxies = defaultdict(list) for entries in self.entries.values(): if len(entries)>0: entry_class = entries[0].__class__ if hasattr(entry_class,"to_proxy") and not hasattr(entry_class,"ignore"): - self._proxies[entry_class.name] = [entry.proxy for entry in entries] + self._proxies[entry_class.__name__] = [entry.proxy for entry in entries] return self.proxies diff --git a/mmtbx/geometry_restraints/tst_geo_file_parsing.py b/mmtbx/geometry_restraints/tst_geo_file_parsing.py index 123bacc931..5d48f9f2b8 100644 --- a/mmtbx/geometry_restraints/tst_geo_file_parsing.py +++ b/mmtbx/geometry_restraints/tst_geo_file_parsing.py @@ -29,7 +29,7 @@ # access entries -entry = geo_container.entries["dihedral"][0] +entry = geo_container.entries["Dihedral angle"][0] # access entry as a pure dict entry.record @@ -417,7 +417,7 @@ # 1yjp .geo text (with labels), write the first entry of each type to json result_json_1 = """ { - "bond": [ + "Bond": [ { "i_seqs": [ 0, @@ -436,7 +436,7 @@ "origin_id": 0 } ], - "angle": [ + "Bond angle": [ { "i_seqs": [ 12, @@ -457,7 +457,7 @@ "origin_id": 0 } ], - "dihedral": [ + "Dihedral angle": [ { "i_seqs": [ 13, @@ -481,7 +481,7 @@ "origin_id": 0 } ], - "chirality": [ + "Chirality": [ { "i_seqs": [ 30, @@ -505,7 +505,7 @@ "origin_id": 0 } ], - "plane": [ + "Planarity": [ { "i_seqs": [ 50, @@ -580,7 +580,7 @@ "origin_id": 0 } ], - "nonbonded": [ + "Nonbonded": [ { "i_seqs": [ 57, @@ -601,16 +601,13 @@ # 1yjp .geo text (without labels), write the first entry of each type to json result_json_2 = """ { - "bond": [ + "Bond": [ { "i_seqs": [ 0, 1 ], - "atom_labels": [ - "0", - "1" - ], + "atom_labels": [], "ideal": 1.451, "model": 1.507, "delta": -0.056, @@ -620,18 +617,14 @@ "origin_id": 0 } ], - "angle": [ + "Bond angle": [ { "i_seqs": [ 12, 13, 14 ], - "atom_labels": [ - "12", - "13", - "14" - ], + "atom_labels": [], "ideal": 108.9, "model": 113.48, "delta": -4.58, @@ -641,7 +634,7 @@ "origin_id": 0 } ], - "dihedral": [ + "Dihedral angle": [ { "i_seqs": [ 13, @@ -649,12 +642,7 @@ 20, 21 ], - "atom_labels": [ - "13", - "14", - "20", - "21" - ], + "atom_labels": [], "ideal": 180.0, "model": 166.21, "delta": 13.79, @@ -665,7 +653,7 @@ "origin_id": 0 } ], - "chirality": [ + "Chirality": [ { "i_seqs": [ 30, @@ -673,12 +661,7 @@ 31, 33 ], - "atom_labels": [ - "30", - "29", - "31", - "33" - ], + "atom_labels": [], "both_signs": "False", "ideal": 2.51, "model": 2.39, @@ -689,7 +672,7 @@ "origin_id": 0 } ], - "plane": [ + "Planarity": [ { "i_seqs": [ 50, @@ -701,16 +684,7 @@ 56, 57 ], - "atom_labels": [ - "50", - "51", - "52", - "53", - "54", - "55", - "56", - "57" - ], + "atom_labels": [], "delta": [ -0.006, 0.022, @@ -764,16 +738,13 @@ "origin_id": 0 } ], - "nonbonded": [ + "Nonbonded": [ { "i_seqs": [ 57, 62 ], - "atom_labels": [ - "57", - "62" - ], + "atom_labels": [], "model": 2.525, "vdw": 3.04, "sym.op.": "-x+1,y-1/2,-z+1", @@ -785,7 +756,7 @@ # 1yjp .geo text (with labels but no model), So no i_seqs possible result_json_3 = """ { - "bond": [ + "Bond": [ { "i_seqs": [], "atom_labels": [ @@ -801,7 +772,7 @@ "origin_id": 0 } ], - "angle": [ + "Bond angle": [ { "i_seqs": [], "atom_labels": [ @@ -818,7 +789,7 @@ "origin_id": 0 } ], - "dihedral": [ + "Dihedral angle": [ { "i_seqs": [], "atom_labels": [ @@ -837,7 +808,7 @@ "origin_id": 0 } ], - "chirality": [ + "Chirality": [ { "i_seqs": [], "atom_labels": [ @@ -856,7 +827,7 @@ "origin_id": 0 } ], - "plane": [ + "Planarity": [ { "i_seqs": [], "atom_labels": [ @@ -922,7 +893,7 @@ "origin_id": 0 } ], - "nonbonded": [ + "Nonbonded": [ { "i_seqs": [], "atom_labels": [ @@ -940,7 +911,7 @@ # tst_1_geo parsed (testing multiple types of origin ids), the first entry for each entry type. result_json_4 = """ { - "bond": [ + "Bond": [ { "i_seqs": [], "atom_labels": [ @@ -956,7 +927,7 @@ "origin_id": 0 } ], - "angle": [ + "Bond angle": [ { "i_seqs": [], "atom_labels": [ @@ -973,7 +944,7 @@ "origin_id": 0 } ], - "dihedral": [ + "Dihedral angle": [ { "i_seqs": [], "atom_labels": [ @@ -992,7 +963,7 @@ "origin_id": 0 } ], - "chirality": [ + "Chirality": [ { "i_seqs": [], "atom_labels": [ @@ -1011,7 +982,7 @@ "origin_id": 0 } ], - "plane": [ + "Planarity": [ { "i_seqs": [], "atom_labels": [ @@ -1083,7 +1054,7 @@ "origin_id": 0 } ], - "nonbonded": [ + "Nonbonded": [ { "i_seqs": [], "atom_labels": [ @@ -1095,7 +1066,7 @@ "origin_id": 0 } ], - "parallelity": [ + "Parallelity": [ { "i_seqs": [], "j_seqs": [], @@ -1136,16 +1107,13 @@ # Tests ability to read atom labels in varied forms (id_str, i_seq) result_json_5 = """ { - "bond": [ + "Bond": [ { "i_seqs": [ 0, 1 ], - "atom_labels": [ - "0", - "1" - ], + "atom_labels": [], "ideal": 1.48, "model": 1.481, "delta": -0.001, @@ -1155,18 +1123,14 @@ "origin_id": 0 } ], - "angle": [ + "Bond angle": [ { "i_seqs": [ 26, 27, 28 ], - "atom_labels": [ - "26", - "27", - "28" - ], + "atom_labels": [], "ideal": 112.0, "model": 112.26, "delta": -0.26, @@ -1176,7 +1140,7 @@ "origin_id": 0 } ], - "dihedral": [ + "Dihedral angle": [ { "i_seqs": [ 25, @@ -1184,12 +1148,7 @@ 27, 28 ], - "atom_labels": [ - "25", - "26", - "27", - "28" - ], + "atom_labels": [], "ideal": -106.54, "model": -179.87, "delta": 73.33, @@ -1200,7 +1159,7 @@ "origin_id": 0 } ], - "chirality": [ + "Chirality": [ { "i_seqs": [ 10, @@ -1208,12 +1167,7 @@ 12, 13 ], - "atom_labels": [ - "10", - "11", - "12", - "13" - ], + "atom_labels": [], "both_signs": "False", "ideal": 2.47, "model": 2.45, @@ -1224,7 +1178,7 @@ "origin_id": 0 } ], - "plane": [ + "Planarity": [ { "i_seqs": [ 3, @@ -1237,17 +1191,7 @@ 10, 11 ], - "atom_labels": [ - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "10", - "11" - ], + "atom_labels": [], "delta": [ -0.001, 0.002, @@ -1306,22 +1250,19 @@ "origin_id": 0 } ], - "nonbonded": [ + "Nonbonded": [ { "i_seqs": [ 21, 22 ], - "atom_labels": [ - "21", - "22" - ], + "atom_labels": [], "model": 2.683, "vdw": 2.672, "origin_id": 0 } ], - "parallelity": [ + "Parallelity": [ { "i_seqs": [ 23, @@ -1348,31 +1289,8 @@ 7, 9 ], - "atom_labels_i": [ - "23", - "25", - "27", - "29", - "0", - "2", - "4", - "6", - "8", - "10", - "11", - "12" - ], - "atom_labels_j": [ - "24", - "26", - "28", - "30", - "1", - "3", - "5", - "7", - "9" - ], + "atom_labels_i": [], + "atom_labels_j": [], "residual": 6.47, "delta(deg)": 5.5671, "sigma": 0.027, @@ -1455,7 +1373,7 @@ def tst_01(model,printing=False): if not geo_container.has_proxies: geo_container.build_proxies() assert geo_container.has_proxies - assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["nonbonded"]) + assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["Nonbonded"]) def tst_02(model,printing=False): # Test a 1yjp with NO labels and YES a model @@ -1488,7 +1406,7 @@ def tst_02(model,printing=False): if not geo_container.has_proxies: geo_container.build_proxies() assert geo_container.has_proxies - assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["nonbonded"]) + assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["Nonbonded"]) def tst_03(model,printing=False): # Test a 1yjp with NO labels and NO a model @@ -1522,7 +1440,7 @@ def tst_03(model,printing=False): if not geo_container.has_proxies: geo_container.build_proxies() assert geo_container.has_proxies - assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["nonbonded"]) + assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["Nonbonded"]) def tst_04(model,printing=False): # Test a 1yjp with YES labels and NO a model @@ -1583,8 +1501,9 @@ def tst_05(model,printing=False): assert len(entries) == 39 assert not geo_container.has_proxies - origin_ids = [entry.origin_id for entry in geo_container.entries_list] - assert origin_ids ==[0, 9, 18, 1, 53, 2, 3, 73, 5, 22, 20, 4, 21, 0, 18, 1, 53, 2, 73, 5, 22, 20, 21, 0, 81, 82, 73, 0, 18, 53, 22, 20, 21, 0, 53, 73, 0, 6, 7], "Got:" + str(origin_ids) + origins = [entry.origin_label for entry in geo_container.entries_list] + expected_origins = ['covalent geometry', 'Misc.', 'link_ALPHA2-6', 'Disulphide bridge', 'link_NAG-ASN', '-like', 'Metal coordination', 'link_TRANS', 'Custom Glycosidic', 'link_BETA1-6', 'link_BETA1-3', 'User supplied', 'link_BETA1-4', 'covalent geometry', 'link_ALPHA2-6', 'Disulphide bridge', 'link_NAG-ASN', 'Secondary Structure restraints around h-bond', 'link_TRANS', 'Custom Glycosidic', 'link_BETA1-6', 'link_BETA1-3', 'link_BETA1-4', 'covalent geometry', 'C-Beta improper', 'Side chain', 'link_TRANS', 'covalent geometry', 'link_ALPHA2-6', 'link_NAG-ASN', 'link_BETA1-6', 'link_BETA1-3', 'link_BETA1-4', 'covalent geometry', 'link_NAG-ASN', 'link_TRANS', '', 'Stacking parallelity', 'Basepair parallelity'] + assert origins ==expected_origins, "Got:" + str(origins) def tst_06(model,printing=False): # Test reading complicated geo file @@ -1614,10 +1533,11 @@ def tst_06(model,printing=False): if not geo_container.has_proxies: geo_container.build_proxies() assert geo_container.has_proxies - assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["nonbonded"]) + assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["Nonbonded"]) - origin_ids = [entry.origin_id for entry in geo_container.entries_list] - assert origin_ids ==[0, 9, 18, 1, 53, 2, 3, 73, 5, 22, 20, 4, 21, 0, 18, 1, 53, 2, 73, 5, 22, 20, 21, 0, 81, 82, 73, 0, 18, 53, 22, 20, 21, 0, 53, 73, 0, 6, 7], "Got: "+ str(origin_ids) + origins = [entry.origin_label for entry in geo_container.entries_list] + expected_origins = ['covalent geometry', 'Misc.', 'link_ALPHA2-6', 'Disulphide bridge', 'link_NAG-ASN', '-like', 'Metal coordination', 'link_TRANS', 'Custom Glycosidic', 'link_BETA1-6', 'link_BETA1-3', 'User supplied', 'link_BETA1-4', 'covalent geometry', 'link_ALPHA2-6', 'Disulphide bridge', 'link_NAG-ASN', 'Secondary Structure restraints around h-bond', 'link_TRANS', 'Custom Glycosidic', 'link_BETA1-6', 'link_BETA1-3', 'link_BETA1-4', 'covalent geometry', 'C-Beta improper', 'Side chain', 'link_TRANS', 'covalent geometry', 'link_ALPHA2-6', 'link_NAG-ASN', 'link_BETA1-6', 'link_BETA1-3', 'link_BETA1-4', 'covalent geometry', 'link_NAG-ASN', 'link_TRANS', '', 'Stacking parallelity', 'Basepair parallelity'] + assert origins == expected_origins, "Got: "+ str(origins) def tst_07(model,printing=False): @@ -1625,15 +1545,13 @@ def tst_07(model,printing=False): result_js = """ { - "bond": [ + "Bond": [ { "i_seqs": [ 0, 1 ], "atom_labels": [ - "0", - "1" ], "ideal": 1.451, "model": 1.507, @@ -1670,7 +1588,7 @@ def tst_07(model,printing=False): if not geo_container.has_proxies: geo_container.build_proxies() assert geo_container.has_proxies - assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["nonbonded"]) + assert len(geo_container.proxies_list) == len(entries)-len(geo_container.entries["Nonbonded"])