diff --git a/src/biotite/structure/io/pdb/file.py b/src/biotite/structure/io/pdb/file.py index 6d192dac6..77d66ca6b 100644 --- a/src/biotite/structure/io/pdb/file.py +++ b/src/biotite/structure/io/pdb/file.py @@ -7,6 +7,7 @@ __all__ = ["PDBFile"] import warnings +from collections import namedtuple import numpy as np from biotite.file import InvalidFileError, TextFile from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat @@ -53,6 +54,8 @@ _alpha = slice(33, 40) _beta = slice(40, 47) _gamma = slice(47, 54) +_space = slice(55, 66) +_z = slice(66, 70) class PDBFile(TextFile): @@ -545,6 +548,38 @@ def get_structure( return array + def get_space_group(self): + """ + Extract the space group and Z value from the CRYST1 record. + + Returns + ------- + space_group : str + The extracted space group. + z_val : int + The extracted Z value. + """ + # Initialize the namedtuple + SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"]) + + # CRYST1 is a one-time record so we can extract it directly + for line in self.lines: + if line.startswith("CRYST1"): + try: + # Extract space group and Z value + space_group = str(line[_space]) + z_val = int(line[_z]) + except ValueError: + # File contains invalid 'CRYST1' record + raise InvalidFileError( + "File does not contain valid space group and/or Z values" + ) + # Set default values + space_group = "P 1" + z_val = 1 + break + return SpaceGroupInfo(space_group=space_group, z_val=z_val) + def set_structure(self, array, hybrid36=False): """ Set the :class:`AtomArray` or :class:`AtomArrayStack` for the @@ -700,6 +735,32 @@ def set_structure(self, array, hybrid36=False): self._index_models_and_atoms() + def set_space_group(self, info): + """ + Update the CRYST1 record with the provided space group and Z value. + + Parameters + ---------- + info : tuple(str, int) or SpaceGroupInfo + Contains the space group and Z-value. + """ + for i, line in enumerate(self.lines): + if line.startswith("CRYST1"): + try: + # Format the replacement string + space_group_str = info.space_group.ljust(11) + z_val_str = str(info.z_val).rjust(4) + + # Replace the existing CRYST1 record + self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:] + except (ValueError, AttributeError) as e: + # Raise an exception with context + raise AttributeError( + f"Failed to update CRYST1 record. " + f"Line: {line.strip()} | Error: {e}" + ) + break + def list_assemblies(self): """ List the biological assemblies that are available for the diff --git a/tests/structure/io/test_pdb.py b/tests/structure/io/test_pdb.py index 3b1ef07a6..faec00fb9 100644 --- a/tests/structure/io/test_pdb.py +++ b/tests/structure/io/test_pdb.py @@ -77,6 +77,45 @@ def test_array_conversion(path, model, hybrid36, include_bonds): assert array1.coord.tolist() == array2.coord.tolist() +@pytest.mark.parametrize( + "path", + glob.glob(join(data_dir("structure"), "*.pdb")), +) +def test_space_group(path): + """ + Test the preservation of space group information and structure + when reading and writing a PDB file. + + Parameters + ---------- + path : str + Path to the PDB file. + """ + # Read the PDB file + pdb_file = pdb.PDBFile.read(path) + print(f"Testing file: {path}") + + try: + # Extract structure and space group + stack1 = pdb_file.get_structure(model=1) + cryst1 = pdb_file.get_space_group() + except biotite.InvalidFileError: + raise + + # Write the structure and space group back to a new PDB file + pdb_file = pdb.PDBFile() + pdb_file.set_structure(stack1) + pdb_file.set_space_group(cryst1) + + # Re-read the structure and space group + stack2 = pdb_file.get_structure(model=1) + cryst2 = pdb_file.get_space_group() + + # Assertions to check if the original and new data match + assert stack1 == stack2, "Structure mismatch after writing and reading." + assert cryst1 == cryst2, "Space group mismatch after writing and reading." + + @pytest.mark.parametrize( "path, model", itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]),