From a6aec5799c7648904bb3334498e8b3eca0ff0071 Mon Sep 17 00:00:00 2001 From: akhoroshev Date: Thu, 18 Feb 2021 11:55:31 +0300 Subject: [PATCH] Fix global atom mapping (#50) * fix global atom mapping * use Atom as type parameter for Bond * version up --- ChangeLog.md | 4 +++ package.yaml | 2 +- src/Bio/PDB.hs | 17 ++++++---- src/Bio/PDB/BondRestoring.hs | 63 ++++++++++++++++++------------------ src/Bio/PDB/Type.hs | 5 ++- 5 files changed, 50 insertions(+), 41 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 16679dc..a8b750a 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,6 +2,10 @@ ## [Unreleased] +## [0.1.3.17] - 2021-02-18 +### Fixed +- PDB parsing. + ## [0.1.3.16] - 2021-02-16 ### Fixed - Bad comment in GB parser. diff --git a/package.yaml b/package.yaml index 6fa928d..5311bd6 100644 --- a/package.yaml +++ b/package.yaml @@ -1,5 +1,5 @@ name: cobot-io -version: 0.1.3.16 +version: 0.1.3.17 github: "biocad/cobot-io" license: BSD3 category: Bio diff --git a/src/Bio/PDB.hs b/src/Bio/PDB.hs index b84f563..42b3f2a 100644 --- a/src/Bio/PDB.hs +++ b/src/Bio/PDB.hs @@ -4,7 +4,8 @@ module Bio.PDB , modelsFromPDBFile, modelsToPDBFile ) where -import Bio.PDB.BondRestoring (residueID, restoreChainLocalBonds, +import Bio.PDB.BondRestoring (residueID, + restoreChainLocalBonds, restoreModelGlobalBonds) import Bio.PDB.Functions (groupChainByResidue) import Bio.PDB.Reader (PDBWarnings, fromTextPDB) @@ -32,13 +33,15 @@ instance StructureModels PDB.PDB where modelsOf PDB.PDB {..} = fmap mkModel models where mkModel :: PDB.Model -> Model - mkModel model = Model (fmap mkChain model) (restoreModelGlobalBonds atomSerialToNilBasedIndex model) + mkModel model = case length atomToNilBasedIndex == length allModelAtoms of + False -> error "Mapping from PDB id to nil based index must be a bijection." + True -> Model (fmap mkChain model) (restoreModelGlobalBonds atomToNilBasedIndex model) where - atomSerialToNilBasedIndex :: Map Int Int - atomSerialToNilBasedIndex = M.fromList $ allModelAtomSerials `zip` [0..] + atomToNilBasedIndex :: Map PDB.Atom Int + atomToNilBasedIndex = M.fromList $ allModelAtoms `zip` [0..] - allModelAtomSerials :: [Int] - allModelAtomSerials = sort . V.toList . fmap PDB.atomSerial . V.concat $ V.toList model + allModelAtoms :: [PDB.Atom] + allModelAtoms = sort . V.toList . V.concat $ V.toList model mkChain :: PDB.Chain -> Chain mkChain = uncurry Chain . (mkChainName &&& mkChainResidues) @@ -66,7 +69,7 @@ instance StructureModels PDB.PDB where firstResidueAtom = head atoms' mkAtom :: PDB.Atom -> Atom - mkAtom PDB.Atom{..} = Atom (GlobalID $ atomSerialToNilBasedIndex M.! atomSerial) + mkAtom atom@PDB.Atom{..} = Atom (GlobalID $ atomToNilBasedIndex M.! atom) atomSerial (T.strip atomName) atomElement diff --git a/src/Bio/PDB/BondRestoring.hs b/src/Bio/PDB/BondRestoring.hs index 717d24c..f246b4a 100644 --- a/src/Bio/PDB/BondRestoring.hs +++ b/src/Bio/PDB/BondRestoring.hs @@ -45,54 +45,54 @@ restoreChainLocalBonds' chainAtoms = residueIDToLocalBonds let _residueID = residueID $ head residueAtoms pure (_residueID, localBonds) - intraResidueGlobalBonds :: [[Bond GlobalID]] + intraResidueGlobalBonds :: [[Bond PDB.Atom]] intraResidueGlobalBonds = fmap restoreIntraResidueBonds chainAtomsGroupedByResidue chainAtomsGroupedByResidue :: [[PDB.Atom]] chainAtomsGroupedByResidue = groupChainByResidue chainAtoms - convertGlobalsToLocals :: [PDB.Atom] -> [Bond GlobalID] -> [Bond LocalID] + convertGlobalsToLocals :: [PDB.Atom] -> [Bond PDB.Atom] -> [Bond LocalID] convertGlobalsToLocals residueAtoms = map convertGlobalToLocal where - convertGlobalToLocal :: Bond GlobalID -> Bond LocalID - convertGlobalToLocal (Bond (GlobalID from) (GlobalID to) order) = - Bond (LocalID $ globalToLocalIdxMap ! from) (LocalID $ globalToLocalIdxMap ! to) order + convertGlobalToLocal :: Bond PDB.Atom -> Bond LocalID + convertGlobalToLocal (Bond from to order) = + Bond (LocalID $ atomToLocalIdMap ! from) (LocalID $ atomToLocalIdMap ! to) order - globalToLocalIdxMap :: Map Int Int - globalToLocalIdxMap = M.fromList $ zip sortedGlobalIndices [0..] + atomToLocalIdMap :: Map PDB.Atom Int + atomToLocalIdMap = M.fromList $ zip sortedAtoms [0..] - sortedGlobalIndices :: [Int] - sortedGlobalIndices = map PDB.atomSerial $ sort residueAtoms + sortedAtoms :: [PDB.Atom] + sortedAtoms = sort residueAtoms -restoreModelGlobalBonds :: Map Int Int -> Vector (Vector PDB.Atom) -> Vector (Bond GlobalID) -restoreModelGlobalBonds atomSerialToNilBasedIndex chains = convertGlobalIDs atomSerialToNilBasedIndex . V.fromList $ _intraResidueBonds ++ peptideBonds ++ disulfideBonds +restoreModelGlobalBonds :: Map PDB.Atom Int -> Vector (Vector PDB.Atom) -> Vector (Bond GlobalID) +restoreModelGlobalBonds atomToNilBasedIndex chains = convertToGlobalIDs atomToNilBasedIndex . V.fromList $ _intraResidueBonds ++ peptideBonds ++ disulfideBonds where - convertGlobalIDs :: Map Int Int -> Vector (Bond GlobalID) -> Vector (Bond GlobalID) - convertGlobalIDs mapping = reindexBonds (\(GlobalID v) -> GlobalID $ mapping ! v) + convertToGlobalIDs :: Map PDB.Atom Int -> Vector (Bond PDB.Atom) -> Vector (Bond GlobalID) + convertToGlobalIDs mapping = reindexBonds (\atom -> GlobalID $ mapping ! atom) - reindexBonds :: (a -> a) -> Vector (Bond a) -> Vector (Bond a) + reindexBonds :: (a -> b) -> Vector (Bond a) -> Vector (Bond b) reindexBonds convertID = fmap (\(Bond from to order) -> Bond (convertID from) (convertID to) order) chainAtomsGroupedByResidue :: Vector [[PDB.Atom]] chainAtomsGroupedByResidue = fmap groupChainByResidue chains - _intraResidueBonds :: [Bond GlobalID] + _intraResidueBonds :: [Bond PDB.Atom] _intraResidueBonds = concatMap restoreChainIntraResidueBonds chainAtomsGroupedByResidue - peptideBonds :: [Bond GlobalID] + peptideBonds :: [Bond PDB.Atom] peptideBonds = concatMap restoreChainPeptideBonds chainAtomsGroupedByResidue - disulfideBonds :: [Bond GlobalID] + disulfideBonds :: [Bond PDB.Atom] disulfideBonds = restoreDisulfideBonds . concat $ V.toList chainAtomsGroupedByResidue -restoreDisulfideBonds :: [[PDB.Atom]] -> [Bond GlobalID] +restoreDisulfideBonds :: [[PDB.Atom]] -> [Bond PDB.Atom] restoreDisulfideBonds atomsGroupedByResidue = do atom1 <- cystineSulfur atom2 <- cystineSulfur guard (PDB.atomSerial atom1 < PDB.atomSerial atom2) guard $ distance (coords atom1) (coords atom2) < sulfidicBondMaxLength - pure $ Bond (GlobalID $ PDB.atomSerial atom1) (GlobalID $ PDB.atomSerial atom2) 1 + pure $ Bond atom1 atom2 1 where cystineSulfur :: [PDB.Atom] cystineSulfur = filter (("SG" ==) . T.strip . PDB.atomName) $ concat cystines @@ -110,16 +110,16 @@ sulfidicBondMaxLength = 2.56 peptideBondMaxLength :: Float peptideBondMaxLength = 1.5 -restoreChainPeptideBonds :: [[PDB.Atom]] -> [Bond GlobalID] +restoreChainPeptideBonds :: [[PDB.Atom]] -> [Bond PDB.Atom] restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptideBonds' atomsGroupedByResidue mempty where - restoreChainPeptideBonds' :: [[PDB.Atom]] -> [Maybe (Bond GlobalID)] -> [Maybe (Bond GlobalID)] + restoreChainPeptideBonds' :: [[PDB.Atom]] -> [Maybe (Bond PDB.Atom)] -> [Maybe (Bond PDB.Atom)] restoreChainPeptideBonds' [] acc = acc restoreChainPeptideBonds' [_] acc = acc restoreChainPeptideBonds' (residue1:residue2:residues) acc = restoreChainPeptideBonds' (residue2:residues) (constructBond residue1 residue2 : acc) - constructBond :: [PDB.Atom] -> [PDB.Atom] -> Maybe (Bond GlobalID) + constructBond :: [PDB.Atom] -> [PDB.Atom] -> Maybe (Bond PDB.Atom) constructBond residue1 residue2 = do carbonAtom1 <- getAtomByName residue1 "C" nitrogenAtom2 <- getAtomByName residue2 "N" @@ -128,27 +128,26 @@ restoreChainPeptideBonds atomsGroupedByResidue = catMaybes $ restoreChainPeptide -- in order not to restore a wrong peptide bond in case of absent residues (gaps) guard $ distance (coords carbonAtom1) (coords nitrogenAtom2) < peptideBondMaxLength - pure $ Bond (GlobalID $ PDB.atomSerial carbonAtom1) (GlobalID $ PDB.atomSerial nitrogenAtom2) 1 + pure $ Bond carbonAtom1 nitrogenAtom2 1 getAtomByName :: [PDB.Atom] -> Text -> Maybe PDB.Atom getAtomByName atoms atomNameToFind = find ((atomNameToFind ==) . T.strip . PDB.atomName) atoms - -restoreChainIntraResidueBonds :: [[PDB.Atom]] -> [Bond GlobalID] +restoreChainIntraResidueBonds :: [[PDB.Atom]] -> [Bond PDB.Atom] restoreChainIntraResidueBonds = concatMap restoreIntraResidueBonds -restoreIntraResidueBonds :: [PDB.Atom] -> [Bond GlobalID] +restoreIntraResidueBonds :: [PDB.Atom] -> [Bond PDB.Atom] restoreIntraResidueBonds residueAtoms = catMaybes $ constructBond <$> residueBonds where -- TODO: support bond order somehow - constructBond :: (Text, Text) -> Maybe (Bond GlobalID) - constructBond (fromAtomName, toAtomName) = Bond <$> constructGlobalID fromAtomName <*> constructGlobalID toAtomName <*> Just 1 + constructBond :: (Text, Text) -> Maybe (Bond PDB.Atom) + constructBond (fromAtomName, toAtomName) = Bond <$> constructAtom fromAtomName <*> constructAtom toAtomName <*> Just 1 - constructGlobalID :: Text -> Maybe GlobalID - constructGlobalID atomName = GlobalID <$> atomNameToIndex !? atomName + constructAtom :: Text -> Maybe PDB.Atom + constructAtom atomName = atomNameToAtom !? atomName - atomNameToIndex :: Map Text Int - atomNameToIndex = M.fromList $ (\PDB.Atom{..} -> (T.strip atomName, atomSerial)) <$> residueAtoms + atomNameToAtom :: Map Text PDB.Atom + atomNameToAtom = M.fromList $ (\atom@PDB.Atom{..} -> (T.strip atomName, atom)) <$> residueAtoms residueBonds :: [(Text, Text)] residueBonds = intraResidueBonds . T.strip . PDB.atomResName $ head residueAtoms diff --git a/src/Bio/PDB/Type.hs b/src/Bio/PDB/Type.hs index 43bd13c..fdec2ab 100644 --- a/src/Bio/PDB/Type.hs +++ b/src/Bio/PDB/Type.hs @@ -93,5 +93,8 @@ data Atom = Atom { atomSerial :: Int -- ^ Atom serial number. } deriving (Show, Eq, Generic, NFData) +-- | We cannot use only atomSerial as key because there +-- | can be two atoms with the same atomSerial in different chains +-- instance Ord Atom where - a1 <= a2 = atomSerial a1 <= atomSerial a2 + a1 <= a2 = (atomSerial a1, atomChainID a1) <= (atomSerial a2, atomChainID a2)