Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version 0.1.5.4 Fix Fasta parser with modification #74

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## [Unreleased]

## [0.1.5.4] - 2024-05-16
- Fix Fasta parser for unknown modifications on the end of the line.

## [0.1.5.3] - 2023-12-08
- Update tests and dependencies.

Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.5.3
version: 0.1.5.4
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
2 changes: 1 addition & 1 deletion src/Bio/FASTA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import Bio.FASTA.Writer (WritableFastaToken (..), fastaToText)

-- | Reads 'FastaSequence' from given file.
--
fromFile :: (MonadFail m, MonadIO m) => FilePath -> m (Fasta Char)
fromFile :: (MonadFail m, MonadIO m, ParsableFastaToken a) => FilePath -> m (Fasta a)
fromFile f = liftIO (readFile f) >>= either (fail . errorBundlePretty) pure . parse fastaP (takeBaseName f)

-- | Writes 'FastaSequence' to file.
Expand Down
8 changes: 7 additions & 1 deletion src/Bio/FASTA/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ type Parser = Parsec Void Text
parseOnly :: Parsec Void Text a -> Text -> Either String a
parseOnly p s = first errorBundlePretty $ parse p "input.fasta" s

-- Using 'hspace1' instead of just 'space1' because our 'fastaLine' parser
-- expects each line to end with line-ending or end of file. But if 'sc' consumes end-of-line,
-- 'lexeme' in 'unknownP' also will and 'fastaLine' will not know that line has ended and will
-- expect more symbols.
--
-- 'hspace1' consumes only "horizontal" space, leaving line-ending for 'fastaLine'.
sc :: Parser ()
sc = L.space space1 empty empty
sc = L.space hspace1 empty empty

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
Expand Down
2 changes: 2 additions & 0 deletions test/FASTA/order10.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>mol1
[FAM]ACGT[UNK][
8 changes: 8 additions & 0 deletions test/FASTA/order9.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>mol1
[FAM]ACGT[UNK]

>mol2
[HEX]ACCGT

>mol3
[HEX]ACGTCA[UNK]
20 changes: 16 additions & 4 deletions test/FASTASpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import Prelude hiding (readFile, writeFile)
import System.Directory (removeFile)
import Test.Hspec

import Bio.FASTA (fastaP, fromFile, toFile)
import Bio.FASTA (ParsableFastaToken, fastaP, fromFile, toFile)
import Bio.FASTA.Parser (parseOnly)
import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.FASTA.Type (Fasta, FastaItem (..), ModItem (..), Modification (..))
import Bio.Sequence (bareSequence)

correctFasta1 :: Fasta Char
Expand Down Expand Up @@ -45,6 +45,16 @@ badFasta7 = Left "input.fasta:2:1:\n |\n2 | 5\8217-CTTCAAGAGAGAGACCTGCGT-3\8217
badFasta8 :: Either String (Fasta Char)
badFasta8 = Left "input.fasta:21:5:\n |\n21 | CMV + enhMCK + prcTnT-2\r\n | ^^\nunexpected \"+ \"\nexpecting end of input, end of line, or letter\n"

correctFasta9 :: Fasta ModItem
correctFasta9 =
[ FastaItem "mol1" $ bareSequence [Mod (Unknown "[FAM]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Mod (Unknown "[UNK]")]
, FastaItem "mol2" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'C',Letter 'G',Letter 'T']
, FastaItem "mol3" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Letter 'C',Letter 'A',Mod (Unknown "[UNK]")]
]

badFasta10 :: Either String (Fasta ModItem)
badFasta10 = Left "input.fasta:2:16:\n|\n2|[FAM]ACGT[UNK][\n|^\nunexpectednewline\nexpectingmodificationname\n"

fastaSpec :: Spec
fastaSpec = describe "Fasta files parser" $ do
describe "fromFile" $ do
Expand All @@ -56,19 +66,21 @@ fastaSpec = describe "Fasta files parser" $ do
parseBadFile "test/FASTA/order6.fasta" badFasta6
parseBadFile "test/FASTA/order7.fasta" badFasta7
parseBadFile "test/FASTA/order8.fasta" badFasta8
parseFile "test/FASTA/order9.fasta" correctFasta9
parseBadFile "test/FASTA/order10.fasta" badFasta10

describe "toFile" $ do
writeFile "test/FASTA/input.fasta" correctFasta5
writeFile "test/FASTA/input.fasta" correctFasta1
writeFile "test/FASTA/input.fasta" correctFasta3

parseFile :: FilePath -> Fasta Char -> Spec
parseFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Fasta a -> Spec
parseFile path cf =
it ("correctly parses good fasta from file " <> path) $ do
fasta <- fromFile path
fasta `shouldBe` cf

parseBadFile :: FilePath -> Either String (Fasta Char) -> Spec
parseBadFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Either String (Fasta a) -> Spec
parseBadFile path cf =
it ("correctly parses bad fasta from file " <> path) $ do
res <- liftIO (readFile path)
Expand Down
Loading