From 147145338d3785ae70fc72aa0c4167ec082346e5 Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Wed, 27 Nov 2024 13:36:42 +0000 Subject: [PATCH 01/10] add and methods --- src/biotite/structure/io/pdb/file.py | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 6d192dac6..67a2a2f2a 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -7,6 +7,7 @@ __all__ = ["PDBFile"] import warnings +from collections import namedtuple import numpy as np from biotite.file import InvalidFileError, TextFile from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat @@ -53,6 +54,8 @@ _alpha = slice(33, 40) _beta = slice(40, 47) _gamma = slice(47, 54) +_space = slice(55, 66) +_z = slice(66, 70) class PDBFile(TextFile): @@ -545,6 +548,37 @@ def get_structure( return array + def get_space_group(self): + """ + Extract the space group and Z value from the CRYST1 record. + + Returns + ------- + SpaceGroupInfo : A namedtuple + space_group (str): The extracted space group, or "P 1" if not found. + z_val (int): The extracted Z value, or 1 if not found. + """ + # Initialize the namedtuple + SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"]) + + # CRYST1 is a one-time record so we can extract it directly + for line in self.lines: + if line.startswith("CRYST1"): + try: + # Extract space group and Z value + space_group = str(line[_space]) + z_val = int(line[_z]) + except ValueError: + # File contains invalid 'CRYST1' record + warnings.warn( + "File contains invalid 'CRYST1' record, using defaults" + ) + # Set default values + space_group = "P 1" + z_val = 1 + break + return SpaceGroupInfo(space_group=space_group, z_val=z_val) + def set_structure(self, array, hybrid36=False): """ Set the :class:`AtomArray` or :class:`AtomArrayStack` for the @@ -700,6 +734,33 @@ def set_structure(self, array, hybrid36=False): self._index_models_and_atoms() + def set_space_group(self, add_info): + """ + Update the CRYST1 record with the provided space group and Z value. + + Parameters + ---------- + add_info : namedtuple or similar + Containes 'space_group' (str) and 'z_val' (int) attributes + """ + for i, line in enumerate(self.lines): + if line.startswith("CRYST1"): + try: + # Format the replacement string + space_group_str = add_info.space_group.ljust(11) + z_val_str = str(add_info.z_val).rjust(4) + + # Replace the existing CRYST1 record + self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:] + except (ValueError, AttributeError): + # File contains invalid 'CRYST1' record + warnings.warn( + "File contains invalid 'CRYST1' record; defaulting to 'P 1'" + "space group and '1' Z value" + ) + break + break + def list_assemblies(self): """ List the biological assemblies that are available for the From 043949b1ac1dc72150d7fbbf397d0ac4ed5be646 Mon Sep 17 00:00:00 2001 From: Alex Brueckner Date: Mon, 2 Dec 2024 09:28:27 -0500 Subject: [PATCH 02/10] Clean parameter details Co-authored-by: Patrick Kunzmann --- src/biotite/structure/io/pdb/file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 67a2a2f2a..c345fb25a 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -740,8 +740,8 @@ def set_space_group(self, add_info): Parameters ---------- - add_info : namedtuple or similar - Containes 'space_group' (str) and 'z_val' (int) attributes + info : tuple(str, int) or SpaceGroupInfo + Contains the space group and Z-value. """ for i, line in enumerate(self.lines): if line.startswith("CRYST1"): From 14908fc3ff38266ee2c556133d6a97518b2e5b51 Mon Sep 17 00:00:00 2001 From: Alex Brueckner Date: Mon, 2 Dec 2024 09:28:53 -0500 Subject: [PATCH 03/10] Change add_info to info for clarity Co-authored-by: Patrick Kunzmann --- src/biotite/structure/io/pdb/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index c345fb25a..0dab6c295 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -734,7 +734,7 @@ def set_structure(self, array, hybrid36=False): self._index_models_and_atoms() - def set_space_group(self, add_info): + def set_space_group(self, info): """ Update the CRYST1 record with the provided space group and Z value. From 5af30abb5f20f945b1156fea830c1e78fb1e045b Mon Sep 17 00:00:00 2001 From: Alex Brueckner Date: Mon, 2 Dec 2024 09:29:39 -0500 Subject: [PATCH 04/10] Clean numpydoc description Co-authored-by: Patrick Kunzmann --- src/biotite/structure/io/pdb/file.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 0dab6c295..92f8ed961 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -554,9 +554,10 @@ def get_space_group(self): Returns ------- - SpaceGroupInfo : A namedtuple - space_group (str): The extracted space group, or "P 1" if not found. - z_val (int): The extracted Z value, or 1 if not found. + space_group : str + The extracted space group. + z_val : int + The extracted Z value. """ # Initialize the namedtuple SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"]) From c3691b12cf1c4e753477485ee0eba27f523b5d1b Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Thu, 5 Dec 2024 16:13:46 +0000 Subject: [PATCH 05/10] update variable names --- src/biotite/structure/io/pdb/file.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 92f8ed961..360738c71 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -748,8 +748,8 @@ def set_space_group(self, info): if line.startswith("CRYST1"): try: # Format the replacement string - space_group_str = add_info.space_group.ljust(11) - z_val_str = str(add_info.z_val).rjust(4) + space_group_str = info.space_group.ljust(11) + z_val_str = str(info.z_val).rjust(4) # Replace the existing CRYST1 record self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:] From baf02a9b26eac6602754a989eb1a8b38e89c6a2a Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Thu, 5 Dec 2024 17:55:16 +0000 Subject: [PATCH 06/10] add space group test --- tests/structure/io/test_pdb.py | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index 3b1ef07a6..947de6757 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -76,6 +76,52 @@ def test_array_conversion(path, model, hybrid36, include_bonds): ) assert array1.coord.tolist() == array2.coord.tolist() +@pytest.mark.parametrize( + "path, model", + itertools.product( + glob.glob(join(data_dir("structure"), "*.pdb")), + [None, 1, -1] + ), +) +def test_space_group(path, model): + """ + Test the preservation of space group information and structure + when reading and writing a PDB file. + + Parameters + ---------- + path : str + Path to the PDB file. + model : int or None + Model index for multi-model PDB files, or None to include all models. + """ + # Read the PDB file + pdb_file = pdb.PDBFile.read(path) + print(f"Testing file: {path}, model: {model}") + + try: + # Extract structure and space group + stack1 = pdb_file.get_structure(model=model) # Removed duplicate argument + cryst1 = pdb_file.get_space_group() + except biotite.InvalidFileError: + # If parsing fails for model=None due to mismatched atom counts, skip the test + if model is None: + pytest.skip("Skipping test due to incompatible atom counts across models.") + else: + raise # Re-raise the error for other cases + + # Write the structure and space group back to a new PDB file + pdb_file = pdb.PDBFile() + pdb.set_structure(pdb_file, stack1) + pdb.PDBFile.set_space_group(pdb_file, cryst1) + + # Re-read the structure and space group + stack2 = pdb_file.get_structure(model=model) + cryst2 = pdb_file.get_space_group() + + # Assertions to check if the original and new data match + assert stack1 == stack2, "Structure mismatch after writing and reading." + assert cryst1 == cryst2, "Space group mismatch after writing and reading." @pytest.mark.parametrize( "path, model", From 2d00a540c813a5cce4dd226106746c4b5baf8b60 Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Thu, 5 Dec 2024 18:09:04 +0000 Subject: [PATCH 07/10] raise exception if CRYST1 cannot be altered --- src/biotite/structure/io/pdb/file.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 360738c71..d2b21ef3d 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -571,8 +571,8 @@ def get_space_group(self): z_val = int(line[_z]) except ValueError: # File contains invalid 'CRYST1' record - warnings.warn( - "File contains invalid 'CRYST1' record, using defaults" + raise InvalidFileError( + "File does not contain assembly information (REMARK 300)" ) # Set default values space_group = "P 1" @@ -753,13 +753,12 @@ def set_space_group(self, info): # Replace the existing CRYST1 record self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:] - except (ValueError, AttributeError): - # File contains invalid 'CRYST1' record - warnings.warn( - "File contains invalid 'CRYST1' record; defaulting to 'P 1'" - "space group and '1' Z value" + except (ValueError, AttributeError) as e: + # Raise an exception with context + raise AttributeError( + f"Failed to update CRYST1 record. " + f"Line: {line.strip()} | Error: {e}" ) - break break def list_assemblies(self): From 7b876510fc9f41772da22d0ca28d0e18f098a355 Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Thu, 5 Dec 2024 18:11:26 +0000 Subject: [PATCH 08/10] fix linter issues --- src/biotite/structure/io/pdb/file.py | 2 +- tests/structure/io/test_pdb.py | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index d2b21ef3d..77d66ca6b 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -572,7 +572,7 @@ def get_space_group(self): except ValueError: # File contains invalid 'CRYST1' record raise InvalidFileError( - "File does not contain assembly information (REMARK 300)" + "File does not contain valid space group and/or Z values" ) # Set default values space_group = "P 1" diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index 947de6757..af8ff4de6 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -76,18 +76,16 @@ def test_array_conversion(path, model, hybrid36, include_bonds): ) assert array1.coord.tolist() == array2.coord.tolist() + @pytest.mark.parametrize( "path, model", - itertools.product( - glob.glob(join(data_dir("structure"), "*.pdb")), - [None, 1, -1] - ), + itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]), ) def test_space_group(path, model): """ - Test the preservation of space group information and structure + Test the preservation of space group information and structure when reading and writing a PDB file. - + Parameters ---------- path : str @@ -123,6 +121,7 @@ def test_space_group(path, model): assert stack1 == stack2, "Structure mismatch after writing and reading." assert cryst1 == cryst2, "Space group mismatch after writing and reading." + @pytest.mark.parametrize( "path, model", itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]), From 37cfb46573612308abcae611b6bb0841a9336824 Mon Sep 17 00:00:00 2001 From: Alex Brueckner Date: Fri, 20 Dec 2024 09:10:23 -0500 Subject: [PATCH 09/10] Update tests/structure/io/test_pdb.py Code conciseness updates Co-authored-by: Patrick Kunzmann --- tests/structure/io/test_pdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index af8ff4de6..da74cb2ae 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -110,8 +110,8 @@ def test_space_group(path, model): # Write the structure and space group back to a new PDB file pdb_file = pdb.PDBFile() - pdb.set_structure(pdb_file, stack1) - pdb.PDBFile.set_space_group(pdb_file, cryst1) + pdb_file.set_structure(stack1) + pdb_file.set_space_group(cryst1) # Re-read the structure and space group stack2 = pdb_file.get_structure(model=model) From ded3998fa5483f6b6823b744a93286c4da3aa86a Mon Sep 17 00:00:00 2001 From: alex-sbaq Date: Fri, 20 Dec 2024 16:31:51 +0000 Subject: [PATCH 10/10] refined test_pdb.py --- tests/structure/io/test_pdb.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index da74cb2ae..faec00fb9 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -78,10 +78,10 @@ def test_array_conversion(path, model, hybrid36, include_bonds): @pytest.mark.parametrize( - "path, model", - itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]), + "path", + glob.glob(join(data_dir("structure"), "*.pdb")), ) -def test_space_group(path, model): +def test_space_group(path): """ Test the preservation of space group information and structure when reading and writing a PDB file. @@ -90,23 +90,17 @@ def test_space_group(path, model): ---------- path : str Path to the PDB file. - model : int or None - Model index for multi-model PDB files, or None to include all models. """ # Read the PDB file pdb_file = pdb.PDBFile.read(path) - print(f"Testing file: {path}, model: {model}") + print(f"Testing file: {path}") try: # Extract structure and space group - stack1 = pdb_file.get_structure(model=model) # Removed duplicate argument + stack1 = pdb_file.get_structure(model=1) cryst1 = pdb_file.get_space_group() except biotite.InvalidFileError: - # If parsing fails for model=None due to mismatched atom counts, skip the test - if model is None: - pytest.skip("Skipping test due to incompatible atom counts across models.") - else: - raise # Re-raise the error for other cases + raise # Write the structure and space group back to a new PDB file pdb_file = pdb.PDBFile() @@ -114,7 +108,7 @@ def test_space_group(path, model): pdb_file.set_space_group(cryst1) # Re-read the structure and space group - stack2 = pdb_file.get_structure(model=model) + stack2 = pdb_file.get_structure(model=1) cryst2 = pdb_file.get_space_group() # Assertions to check if the original and new data match