Skip to content

Commit

Permalink
0.1.3.18. Lowercase insertion code support (#53)
Browse files Browse the repository at this point in the history
  • Loading branch information
tiermak authored Mar 9, 2021
1 parent a6aec57 commit dc5473b
Show file tree
Hide file tree
Showing 11 changed files with 1,851 additions and 32 deletions.
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

## [0.1.3.18] - 2021-03-09
### Fixed
- Lowercase insertion code parsing in PDB.

## [0.1.3.17] - 2021-02-18
### Fixed
- PDB parsing.
Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.3.17
version: 0.1.3.18
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
13 changes: 7 additions & 6 deletions src/Bio/MAE.hs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@ import Bio.Structure (Atom (..), Bond (..), Chain (..), Model
Model (..), Residue (..),
SecondaryStructure (..),
StructureModels (..))
import qualified Bio.Utils.Map as M ((!?!))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Attoparsec.Text (parseOnly)
import Data.Bifunctor (bimap, first)
import Data.Function (on)
import qualified Data.List as L (find, groupBy, sortOn)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, lookup, (!))
import qualified Data.Map.Strict as M (fromList, lookup)
import Data.Maybe (catMaybes, fromJust)
import Data.Text (Text)
import qualified Data.Text as T (head, init, last, null, pack,
Expand Down Expand Up @@ -62,7 +63,7 @@ instance StructureModels Mae where
modelsOf Mae{..} = V.fromList $ fmap blockToModel blocks
where
unsafeGetFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> a
unsafeGetFromContentsMap m name i = unsafeFromMaeValue $ (m M.! name) !! i
unsafeGetFromContentsMap m name i = unsafeFromMaeValue $ (m M.!?! name) !! i

getFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> Maybe a
getFromContentsMap m name i = join $ fromMaeValue . (!! i) <$> name `M.lookup` m
Expand All @@ -71,7 +72,7 @@ instance StructureModels Mae where
blockToModel Block{..} = Model (atomsTableToChains atomsTable) bonds
where
atomsTable = findTable "m_atom"
numberOfAtoms = length $ atomsTable M.! "r_m_x_coord"
numberOfAtoms = length $ atomsTable M.!?! "r_m_x_coord"

bondsTable = findTable "m_bond"
(bondGraph, bonds) = bondsTableToGlobalBonds bondsTable
Expand All @@ -89,7 +90,7 @@ instance StructureModels Mae where
bondsTableToGlobalBonds :: Map Text [MaeValue] -> (Map Int [(Int, Int)], Vector (Bond GlobalID))
bondsTableToGlobalBonds m = bimap toMap V.fromList bonds'
where
numberOfBonds = length $ m M.! "i_m_from"
numberOfBonds = length $ m M.!?! "i_m_from"
bonds' = unzip $ fmap indexToBond [0 .. numberOfBonds - 1]

toMap :: [(Int, (Int, Int))] -> Map Int [(Int, Int)]
Expand Down Expand Up @@ -150,7 +151,7 @@ instance StructureModels Mae where

toLocalBond :: Int -> (Int, Int) -> [Bond LocalID]
toLocalBond x (y, o) | y `elem` group = pure $ Bond (LocalID x)
(LocalID $ globalToLocal M.! y)
(LocalID $ globalToLocal M.!?! y)
o
| otherwise = []

Expand All @@ -161,7 +162,7 @@ instance StructureModels Mae where
indexToAtom i = Atom (GlobalID i)
(i + 1)
(stripQuotes $ getFromContentsI "s_m_pdb_atom_name")
(elIndToElement M.! getFromContentsI "i_m_atomic_number")
(elIndToElement M.!?! getFromContentsI "i_m_atomic_number")
coords
(getFromContents 0 "i_m_formal_charge" i)
(getFromContents 0 "r_m_pdb_tfactor" i)
Expand Down
44 changes: 29 additions & 15 deletions src/Bio/PDB.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,31 +11,45 @@ import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure
import Bio.Structure (Residue(..),
Bond,
Atom(..),
LocalID,
GlobalID(GlobalID, getGlobalID),
SecondaryStructure(Undefined),
Chain(..),
Model(Model, modelChains),
StructureSerializable(..),
StructureModels(..))
import qualified Bio.Utils.Map as M ((!?!))
import qualified Bio.Utils.Vector as V ((!?!))
import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (sort)
import Data.Map (Map)
import qualified Data.Map as M (fromList, (!))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V
import qualified Data.Vector as V (toList,
concat,
fromList,
length)
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)

instance StructureModels PDB.PDB where
modelsOf PDB.PDB {..} = fmap mkModel models
where
mkModel :: PDB.Model -> Model
mkModel model = case length atomToNilBasedIndex == length allModelAtoms of
False -> error "Mapping from PDB id to nil based index must be a bijection."
True -> Model (fmap mkChain model) (restoreModelGlobalBonds atomToNilBasedIndex model)
mkModel model = if length atomToNilBasedIndex == length allModelAtoms
then Model (fmap mkChain model) (restoreModelGlobalBonds atomToNilBasedIndex model)
else error "Mapping from PDB id to nil based index must be a bijection."
where
atomToNilBasedIndex :: Map PDB.Atom Int
atomToNilBasedIndex = M.fromList $ allModelAtoms `zip` [0..]
Expand All @@ -49,27 +63,27 @@ instance StructureModels PDB.PDB where
mkChainName :: PDB.Chain -> Text
mkChainName = T.singleton . PDB.atomChainID . safeFirstAtom

mkChainResidues :: PDB.Chain -> V.Vector Residue
mkChainResidues :: PDB.Chain -> Vector Residue
mkChainResidues chain = V.fromList . fmap (mkResidue (restoreChainLocalBonds chain)) $ groupChainByResidue chain

safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
safeFirstAtom arr | V.length arr > 0 = arr V.! 0
safeFirstAtom :: Vector PDB.Atom -> PDB.Atom
safeFirstAtom arr | V.length arr > 0 = arr V.!?! 0
| otherwise = error "Could not pick first atom"

mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue _ [] = error "Cound not make residue from empty list"
mkResidue localBondsMap atoms' = Residue (T.strip $ PDB.atomResName firstResidueAtom)
(PDB.atomResSeq firstResidueAtom)
(PDB.atomICode firstResidueAtom)
(V.fromList $ mkAtom <$> atoms')
(localBondsMap M.! residueID firstResidueAtom)
(localBondsMap M.!?! residueID firstResidueAtom)
Undefined -- now we do not read secondary structure
"" -- chemical component type?!
where
firstResidueAtom = head atoms'

mkAtom :: PDB.Atom -> Atom
mkAtom atom@PDB.Atom{..} = Atom (GlobalID $ atomToNilBasedIndex M.! atom)
mkAtom atom@PDB.Atom{..} = Atom (GlobalID $ atomToNilBasedIndex M.!?! atom)
atomSerial
(T.strip atomName)
atomElement
Expand All @@ -78,10 +92,10 @@ instance StructureModels PDB.PDB where
atomTempFactor
atomOccupancy

modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], V.Vector Model))
modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = liftIO . fmap modelsFromPDBText . TIO.readFile

modelsFromPDBText :: Text -> Either Text ([PDBWarnings], V.Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText pdbText = do
(warnings, parsedPDB) <- fromTextPDB pdbText
let models = modelsOf parsedPDB
Expand Down
20 changes: 13 additions & 7 deletions src/Bio/PDB/Functions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,25 @@ module Bio.PDB.Functions
( groupChainByResidue
) where

import qualified Bio.PDB.Type as PDB (Atom (..))
import Data.Map (Map)
import qualified Data.Map as M (fromList, (!))
import Data.List (groupBy, sortOn)
import Data.Vector (Vector)
import qualified Data.Vector as V (toList)
import qualified Bio.PDB.Type as PDB (Atom (..))
import qualified Bio.Utils.Map as M ((!?!))

import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList)
import Data.List (groupBy,
sortOn)
import Data.Vector (Vector)
import qualified Data.Vector as V (toList)
import Data.Char (toUpper)

groupChainByResidue :: Vector PDB.Atom -> [[PDB.Atom]]
groupChainByResidue = sortOn (sortOnResidue . head) . groupBy atomsFromSameResidue . V.toList
where
atomsFromSameResidue :: PDB.Atom -> PDB.Atom -> Bool
atomsFromSameResidue atom1 atom2 = PDB.atomResSeq atom1 == PDB.atomResSeq atom2 && PDB.atomICode atom1 == PDB.atomICode atom2

sortOnResidue :: PDB.Atom -> Int
sortOnResidue PDB.Atom{..} = atomSerial * 100 + (insertionCodeSortingCorrections M.! atomICode)
sortOnResidue PDB.Atom{..} = atomSerial * 100 + (insertionCodeSortingCorrections M.!?! toUpper atomICode)

insertionCodeSortingCorrections :: Map Char Int
insertionCodeSortingCorrections = M.fromList $ zip (' ':['A'..'Z']) [0..]
7 changes: 4 additions & 3 deletions src/Bio/Structure/Functions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ import Bio.Structure (Atom (..), Bond (..), Chain (..),
localBonds, residues)
import Control.Lens (Traversal', each, (%~), (&))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, (!), (!?))
import qualified Data.Map.Strict as M (fromList, (!?))
import qualified Bio.Utils.Map as M ((!?!))
import Data.Set (Set)
import qualified Data.Set as S (fromList, notMember, unions)
import Data.Text (Text)
Expand Down Expand Up @@ -84,8 +85,8 @@ removeAtomsFromResidue p r'@Residue{..} = (res, S.fromList $ V.toList $ fmap ato
leaveBond (Bond (LocalID l) (LocalID r) _) = l `notElem` indsToDelete && r `notElem` indsToDelete

modifyBond :: Bond LocalID -> Bond LocalID
modifyBond (Bond (LocalID l) (LocalID r) t) = Bond (LocalID $ oldIndsToNew M.! l)
(LocalID $ oldIndsToNew M.! r)
modifyBond (Bond (LocalID l) (LocalID r) t) = Bond (LocalID $ oldIndsToNew M.!?! l)
(LocalID $ oldIndsToNew M.!?! r)
t

newInd :: Int -> Int
Expand Down
13 changes: 13 additions & 0 deletions src/Bio/Utils/Map.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
module Bio.Utils.Map
( (!?!)
) where


import GHC.Stack (HasCallStack)
import Data.Map.Strict (Map,
(!?))
import Data.Maybe (fromMaybe)

infix 9 !?!
(!?!) :: (HasCallStack, Ord k, Show k, Show a) => Map k a -> k -> a
(!?!) m k = fromMaybe (error $ "cobot-io: No key " ++ show k ++ " in Map: " ++ show m) $ m !? k
17 changes: 17 additions & 0 deletions src/Bio/Utils/Vector.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
module Bio.Utils.Vector
( (!?!)
) where


import GHC.Stack (HasCallStack)
import Data.Vector (Vector)
import qualified Data.Vector as V ((!?),
length)
import Data.Maybe (fromMaybe)

infix 9 !?!
(!?!) :: (HasCallStack, Show a) => Vector a -> Int -> a
(!?!) v i = fromMaybe (error msg) $ v V.!? i
where
msg :: String
msg = "cobot-io: index " ++ show i ++ " is out of bounds. Vector length is : " ++ show (V.length v)
Loading

0 comments on commit dc5473b

Please sign in to comment.