Skip to content

Commit

Permalink
Fix global atom mapping (#50)
Browse files Browse the repository at this point in the history
* fix global atom mapping

* use Atom as type parameter for Bond

* version up
  • Loading branch information
akhoroshev authored Feb 18, 2021
1 parent 7d9018a commit a6aec57
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 41 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.1.3.17] - 2021-02-18
### Fixed
- PDB parsing.

## [0.1.3.16] - 2021-02-16
### Fixed
- Bad comment in GB parser.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.3.16
version: 0.1.3.17
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
17 changes: 10 additions & 7 deletions src/Bio/PDB.hs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ module Bio.PDB
, modelsFromPDBFile, modelsToPDBFile
) where

import Bio.PDB.BondRestoring (residueID, restoreChainLocalBonds,
import Bio.PDB.BondRestoring (residueID,
restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
Expand Down Expand Up @@ -32,13 +33,15 @@ instance StructureModels PDB.PDB where
modelsOf PDB.PDB {..} = fmap mkModel models
where
mkModel :: PDB.Model -> Model
mkModel model = Model (fmap mkChain model) (restoreModelGlobalBonds atomSerialToNilBasedIndex model)
mkModel model = case length atomToNilBasedIndex == length allModelAtoms of
False -> error "Mapping from PDB id to nil based index must be a bijection."
True -> Model (fmap mkChain model) (restoreModelGlobalBonds atomToNilBasedIndex model)
where
atomSerialToNilBasedIndex :: Map Int Int
atomSerialToNilBasedIndex = M.fromList $ allModelAtomSerials `zip` [0..]
atomToNilBasedIndex :: Map PDB.Atom Int
atomToNilBasedIndex = M.fromList $ allModelAtoms `zip` [0..]

allModelAtomSerials :: [Int]
allModelAtomSerials = sort . V.toList . fmap PDB.atomSerial . V.concat $ V.toList model
allModelAtoms :: [PDB.Atom]
allModelAtoms = sort . V.toList . V.concat $ V.toList model

mkChain :: PDB.Chain -> Chain
mkChain = uncurry Chain . (mkChainName &&& mkChainResidues)
Expand Down Expand Up @@ -66,7 +69,7 @@ instance StructureModels PDB.PDB where
firstResidueAtom = head atoms'

mkAtom :: PDB.Atom -> Atom
mkAtom PDB.Atom{..} = Atom (GlobalID $ atomSerialToNilBasedIndex M.! atomSerial)
mkAtom atom@PDB.Atom{..} = Atom (GlobalID $ atomToNilBasedIndex M.! atom)
atomSerial
(T.strip atomName)
atomElement
Expand Down
63 changes: 31 additions & 32 deletions src/Bio/PDB/BondRestoring.hs
Original file line number Diff line number Diff line change
Expand Up @@ -45,54 +45,54 @@ restoreChainLocalBonds' chainAtoms = residueIDToLocalBonds
let _residueID = residueID $ head residueAtoms
pure (_residueID, localBonds)

intraResidueGlobalBonds :: [[Bond GlobalID]]
intraResidueGlobalBonds :: [[Bond PDB.Atom]]
intraResidueGlobalBonds = fmap restoreIntraResidueBonds chainAtomsGroupedByResidue

chainAtomsGroupedByResidue :: [[PDB.Atom]]
chainAtomsGroupedByResidue = groupChainByResidue chainAtoms

convertGlobalsToLocals :: [PDB.Atom] -> [Bond GlobalID] -> [Bond LocalID]
convertGlobalsToLocals :: [PDB.Atom] -> [Bond PDB.Atom] -> [Bond LocalID]
convertGlobalsToLocals residueAtoms = map convertGlobalToLocal
where
convertGlobalToLocal :: Bond GlobalID -> Bond LocalID
convertGlobalToLocal (Bond (GlobalID from) (GlobalID to) order) =
Bond (LocalID $ globalToLocalIdxMap ! from) (LocalID $ globalToLocalIdxMap ! to) order
convertGlobalToLocal :: Bond PDB.Atom -> Bond LocalID
convertGlobalToLocal (Bond from to order) =
Bond (LocalID $ atomToLocalIdMap ! from) (LocalID $ atomToLocalIdMap ! to) order

globalToLocalIdxMap :: Map Int Int
globalToLocalIdxMap = M.fromList $ zip sortedGlobalIndices [0..]
atomToLocalIdMap :: Map PDB.Atom Int
atomToLocalIdMap = M.fromList $ zip sortedAtoms [0..]

sortedGlobalIndices :: [Int]
sortedGlobalIndices = map PDB.atomSerial $ sort residueAtoms
sortedAtoms :: [PDB.Atom]
sortedAtoms = sort residueAtoms


restoreModelGlobalBonds :: Map Int Int -> Vector (Vector PDB.Atom) -> Vector (Bond GlobalID)
restoreModelGlobalBonds atomSerialToNilBasedIndex chains = convertGlobalIDs atomSerialToNilBasedIndex . V.fromList $ _intraResidueBonds ++ peptideBonds ++ disulfideBonds
restoreModelGlobalBonds :: Map PDB.Atom Int -> Vector (Vector PDB.Atom) -> Vector (Bond GlobalID)
restoreModelGlobalBonds atomToNilBasedIndex chains = convertToGlobalIDs atomToNilBasedIndex . V.fromList $ _intraResidueBonds ++ peptideBonds ++ disulfideBonds
where
convertGlobalIDs :: Map Int Int -> Vector (Bond GlobalID) -> Vector (Bond GlobalID)
convertGlobalIDs mapping = reindexBonds (\(GlobalID v) -> GlobalID $ mapping ! v)
convertToGlobalIDs :: Map PDB.Atom Int -> Vector (Bond PDB.Atom) -> Vector (Bond GlobalID)
convertToGlobalIDs mapping = reindexBonds (\atom -> GlobalID $ mapping ! atom)

reindexBonds :: (a -> a) -> Vector (Bond a) -> Vector (Bond a)
reindexBonds :: (a -> b) -> Vector (Bond a) -> Vector (Bond b)
reindexBonds convertID = fmap (\(Bond from to order) -> Bond (convertID from) (convertID to) order)

chainAtomsGroupedByResidue :: Vector [[PDB.Atom]]
chainAtomsGroupedByResidue = fmap groupChainByResidue chains

_intraResidueBonds :: [Bond GlobalID]
_intraResidueBonds :: [Bond PDB.Atom]
_intraResidueBonds = concatMap restoreChainIntraResidueBonds chainAtomsGroupedByResidue

peptideBonds :: [Bond GlobalID]
peptideBonds :: [Bond PDB.Atom]
peptideBonds = concatMap restoreChainPeptideBonds chainAtomsGroupedByResidue

disulfideBonds :: [Bond GlobalID]
disulfideBonds :: [Bond PDB.Atom]
disulfideBonds = restoreDisulfideBonds . concat $ V.toList chainAtomsGroupedByResidue

restoreDisulfideBonds :: [[PDB.Atom]] -> [Bond GlobalID]
restoreDisulfideBonds :: [[PDB.Atom]] -> [Bond PDB.Atom]
restoreDisulfideBonds atomsGroupedByResidue = do
atom1 <- cystineSulfur
atom2 <- cystineSulfur
guard (PDB.atomSerial atom1 < PDB.atomSerial atom2)
guard $ distance (coords atom1) (coords atom2) < sulfidicBondMaxLength
pure $ Bond (GlobalID $ PDB.atomSerial atom1) (GlobalID $ PDB.atomSerial atom2) 1
pure $ Bond atom1 atom2 1
where
cystineSulfur :: [PDB.Atom]
cystineSulfur = filter (("SG" ==) . T.strip . PDB.atomName) $ concat cystines
Expand All @@ -110,16 +110,16 @@ sulfidicBondMaxLength = 2.56
peptideBondMaxLength :: Float
peptideBondMaxLength = 1.5

restoreChainPeptideBonds :: [[PDB.Atom]] -> [Bond GlobalID]
restoreChainPeptideBonds :: [[PDB.Atom]] -> [Bond PDB.Atom]
restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptideBonds' atomsGroupedByResidue mempty
where
restoreChainPeptideBonds' :: [[PDB.Atom]] -> [Maybe (Bond GlobalID)] -> [Maybe (Bond GlobalID)]
restoreChainPeptideBonds' :: [[PDB.Atom]] -> [Maybe (Bond PDB.Atom)] -> [Maybe (Bond PDB.Atom)]
restoreChainPeptideBonds' [] acc = acc
restoreChainPeptideBonds' [_] acc = acc
restoreChainPeptideBonds' (residue1:residue2:residues) acc =
restoreChainPeptideBonds' (residue2:residues) (constructBond residue1 residue2 : acc)

constructBond :: [PDB.Atom] -> [PDB.Atom] -> Maybe (Bond GlobalID)
constructBond :: [PDB.Atom] -> [PDB.Atom] -> Maybe (Bond PDB.Atom)
constructBond residue1 residue2 = do
carbonAtom1 <- getAtomByName residue1 "C"
nitrogenAtom2 <- getAtomByName residue2 "N"
Expand All @@ -128,27 +128,26 @@ restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptide
-- in order not to restore a wrong peptide bond in case of absent residues (gaps)
guard $ distance (coords carbonAtom1) (coords nitrogenAtom2) < peptideBondMaxLength

pure $ Bond (GlobalID $ PDB.atomSerial carbonAtom1) (GlobalID $ PDB.atomSerial nitrogenAtom2) 1
pure $ Bond carbonAtom1 nitrogenAtom2 1

getAtomByName :: [PDB.Atom] -> Text -> Maybe PDB.Atom
getAtomByName atoms atomNameToFind = find ((atomNameToFind ==) . T.strip . PDB.atomName) atoms


restoreChainIntraResidueBonds :: [[PDB.Atom]] -> [Bond GlobalID]
restoreChainIntraResidueBonds :: [[PDB.Atom]] -> [Bond PDB.Atom]
restoreChainIntraResidueBonds = concatMap restoreIntraResidueBonds

restoreIntraResidueBonds :: [PDB.Atom] -> [Bond GlobalID]
restoreIntraResidueBonds :: [PDB.Atom] -> [Bond PDB.Atom]
restoreIntraResidueBonds residueAtoms = catMaybes $ constructBond <$> residueBonds
where
-- TODO: support bond order somehow
constructBond :: (Text, Text) -> Maybe (Bond GlobalID)
constructBond (fromAtomName, toAtomName) = Bond <$> constructGlobalID fromAtomName <*> constructGlobalID toAtomName <*> Just 1
constructBond :: (Text, Text) -> Maybe (Bond PDB.Atom)
constructBond (fromAtomName, toAtomName) = Bond <$> constructAtom fromAtomName <*> constructAtom toAtomName <*> Just 1

constructGlobalID :: Text -> Maybe GlobalID
constructGlobalID atomName = GlobalID <$> atomNameToIndex !? atomName
constructAtom :: Text -> Maybe PDB.Atom
constructAtom atomName = atomNameToAtom !? atomName

atomNameToIndex :: Map Text Int
atomNameToIndex = M.fromList $ (\PDB.Atom{..} -> (T.strip atomName, atomSerial)) <$> residueAtoms
atomNameToAtom :: Map Text PDB.Atom
atomNameToAtom = M.fromList $ (\atom@PDB.Atom{..} -> (T.strip atomName, atom)) <$> residueAtoms

residueBonds :: [(Text, Text)]
residueBonds = intraResidueBonds . T.strip . PDB.atomResName $ head residueAtoms
Expand Down
5 changes: 4 additions & 1 deletion src/Bio/PDB/Type.hs
Original file line number Diff line number Diff line change
Expand Up @@ -93,5 +93,8 @@ data Atom = Atom { atomSerial :: Int -- ^ Atom serial number.
}
deriving (Show, Eq, Generic, NFData)

-- | We cannot use only atomSerial as key because there
-- | can be two atoms with the same atomSerial in different chains
--
instance Ord Atom where
a1 <= a2 = atomSerial a1 <= atomSerial a2
a1 <= a2 = (atomSerial a1, atomChainID a1) <= (atomSerial a2, atomChainID a2)

0 comments on commit a6aec57

Please sign in to comment.