Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for space group information when reading and writing pdb files #707

Merged
merged 10 commits into from
Dec 22, 2024
61 changes: 61 additions & 0 deletions src/biotite/structure/io/pdb/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
__all__ = ["PDBFile"]

import warnings
from collections import namedtuple
import numpy as np
from biotite.file import InvalidFileError, TextFile
from biotite.structure.atoms import AtomArray, AtomArrayStack, repeat
Expand Down Expand Up @@ -53,6 +54,8 @@
_alpha = slice(33, 40)
_beta = slice(40, 47)
_gamma = slice(47, 54)
_space = slice(55, 66)
_z = slice(66, 70)


class PDBFile(TextFile):
Expand Down Expand Up @@ -545,6 +548,38 @@ def get_structure(

return array

def get_space_group(self):
"""
Extract the space group and Z value from the CRYST1 record.

Returns
-------
space_group : str
The extracted space group.
z_val : int
The extracted Z value.
"""
# Initialize the namedtuple
SpaceGroupInfo = namedtuple("SpaceGroupInfo", ["space_group", "z_val"])

# CRYST1 is a one-time record so we can extract it directly
for line in self.lines:
if line.startswith("CRYST1"):
try:
# Extract space group and Z value
space_group = str(line[_space])
z_val = int(line[_z])
except ValueError:
# File contains invalid 'CRYST1' record
raise InvalidFileError(
"File does not contain valid space group and/or Z values"
)
# Set default values
space_group = "P 1"
z_val = 1
break
return SpaceGroupInfo(space_group=space_group, z_val=z_val)

def set_structure(self, array, hybrid36=False):
"""
Set the :class:`AtomArray` or :class:`AtomArrayStack` for the
Expand Down Expand Up @@ -700,6 +735,32 @@ def set_structure(self, array, hybrid36=False):

self._index_models_and_atoms()

def set_space_group(self, info):
"""
Update the CRYST1 record with the provided space group and Z value.

Parameters
----------
info : tuple(str, int) or SpaceGroupInfo
Contains the space group and Z-value.
"""
for i, line in enumerate(self.lines):
if line.startswith("CRYST1"):
try:
# Format the replacement string
space_group_str = info.space_group.ljust(11)
z_val_str = str(info.z_val).rjust(4)

# Replace the existing CRYST1 record
self.lines[i] = line[:55] + space_group_str + z_val_str + line[70:]
except (ValueError, AttributeError) as e:
# Raise an exception with context
raise AttributeError(
f"Failed to update CRYST1 record. "
f"Line: {line.strip()} | Error: {e}"
)
break

def list_assemblies(self):
"""
List the biological assemblies that are available for the
Expand Down
39 changes: 39 additions & 0 deletions tests/structure/io/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,45 @@ def test_array_conversion(path, model, hybrid36, include_bonds):
assert array1.coord.tolist() == array2.coord.tolist()


@pytest.mark.parametrize(
"path",
glob.glob(join(data_dir("structure"), "*.pdb")),
)
def test_space_group(path):
"""
Test the preservation of space group information and structure
when reading and writing a PDB file.

Parameters
----------
path : str
Path to the PDB file.
"""
# Read the PDB file
pdb_file = pdb.PDBFile.read(path)
print(f"Testing file: {path}")

try:
# Extract structure and space group
stack1 = pdb_file.get_structure(model=1)
cryst1 = pdb_file.get_space_group()
except biotite.InvalidFileError:
raise

# Write the structure and space group back to a new PDB file
pdb_file = pdb.PDBFile()
pdb_file.set_structure(stack1)
pdb_file.set_space_group(cryst1)

# Re-read the structure and space group
stack2 = pdb_file.get_structure(model=1)
cryst2 = pdb_file.get_space_group()

# Assertions to check if the original and new data match
assert stack1 == stack2, "Structure mismatch after writing and reading."
assert cryst1 == cryst2, "Space group mismatch after writing and reading."


@pytest.mark.parametrize(
"path, model",
itertools.product(glob.glob(join(data_dir("structure"), "*.pdb")), [None, 1, -1]),
Expand Down
Loading