diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..69489f2b --- /dev/null +++ b/.gitignore @@ -0,0 +1,38 @@ +notes.txt +notes.lhs +dist +.cabal-sandbox +cabal.sanbox.config +cabal.config + +# emacs stuff +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* + +# Org-mode +.org-id-locations +*_archive + +# flymake-mode +*_flymake.* + +# eshell files +/eshell/history +/eshell/lastdir + +# elpa packages +/elpa/ + +# vim stuff +*.swp +*.swo + +*.key +_darcs +darcs* diff --git a/MissingH.cabal b/MissingH.cabal index 22978ba6..f9cfbad8 100644 --- a/MissingH.cabal +++ b/MissingH.cabal @@ -52,6 +52,7 @@ Library Control.Concurrent.Thread.Utils, Network.Email.Sendmail, Data.CSV, + Data.TSV, System.Cmd.Utils, Data.BinPacking, Data.Progress.Tracker, diff --git a/README b/README index fa17e858..714ceb5a 100644 --- a/README +++ b/README @@ -175,6 +175,8 @@ MissingH.Str * Leading/trailing whitespace removal MissingH.Str.CSV * Parsing of comma-separated value (CSV) files +MissingH.Str.TSV * Parsing of tab-separated value (TSV) files + MissingH.Threads * Threaded callbacks MissingH.Time * Utilities for working with times and dates diff --git a/src/Data/CSV.hs b/src/Data/CSV.hs index e44f70d1..037531c7 100644 --- a/src/Data/CSV.hs +++ b/src/Data/CSV.hs @@ -1,4 +1,4 @@ -{- arch-tag: CSV and TSV utilities +{- arch-tag: CSV utilities Copyright (c) 2005-2011 John Goerzen All rights reserved. @@ -22,28 +22,13 @@ Written by John Goerzen, jgoerzen\@complete.org module Data.CSV (csvFile, genCsvFile) where import Text.ParserCombinators.Parsec -import Data.List (intersperse) - -eol :: forall st. GenParser Char st String -eol = (try $ string "\n\r") <|> (try $ string "\r\n") <|> string "\n" <|> - string "\r" "End of line" +import Data.SeperatingValues.SeperatingValues cell :: GenParser Char st String -cell = quotedcell <|> many (noneOf ",\n\r") - -quotedchar :: GenParser Char st Char -quotedchar = noneOf "\"" - <|> (try $ do string "\"\"" - return '"' - ) -quotedcell :: CharParser st String -quotedcell = do char '"' - content <- many quotedchar - char '"' - return content +cell = cellOfX ',' line :: GenParser Char st [String] -line = sepBy cell (char ',') +line = lineOfX ',' {- | Parse a Comma-Separated Value (CSV) file. The return value is a list of lines; each line is a list of cells; and each cell is a String. @@ -89,14 +74,4 @@ csvFile = endBy line eol {- | Generate CSV data for a file. The resulting string can be written out to disk directly. -} genCsvFile :: [[String]] -> String -genCsvFile inp = - unlines . map csvline $ inp - where csvline :: [String] -> String - csvline l = concat . intersperse "," . map csvcells $ l - csvcells :: String -> String - csvcells "" = "" - csvcells c = '"' : convcell c ++ "\"" - convcell :: String -> String - convcell c = concatMap convchar c - convchar '"' = "\"\"" - convchar x = [x] +genCsvFile inp = genXsvFile "," inp diff --git a/src/Data/SeperatingValues/SeperatingValues.hs b/src/Data/SeperatingValues/SeperatingValues.hs new file mode 100644 index 00000000..9c3d989d --- /dev/null +++ b/src/Data/SeperatingValues/SeperatingValues.hs @@ -0,0 +1,52 @@ +{- | + Module : Data.SeperatingValues.SeperatingValues + Copyright : Copyright (C) 2005-2011 John Goerzen + License : BSD3 + + Maintainer : John Goerzen + Stability : provisional + Portability: portable + +Misc/Helper Haskell Parsec parsers for any(X)-separated values (XSV) files. + +Written by: Aistis Raulinaitis, sheganinans@gmail.com +-} + +module Data.SeperatingValues.SeperatingValues(eol, cellOfX, quotedchar, lineOfX, genXsvFile) where + +import Text.ParserCombinators.Parsec +import Data.List (intersperse) + +eol :: forall st. GenParser Char st String +eol = (try $ string "\n\r") <|> (try $ string "\r\n") <|> string "\n" <|> + string "\r" "End of line" + +cellOfX :: Char -> GenParser Char st String +cellOfX x = quotedcell <|> many (noneOf (x : "\n\r")) + +quotedchar :: GenParser Char st Char +quotedchar = noneOf "\"" + <|> (try $ do string "\"\"" + return '"' + ) +quotedcell :: CharParser st String +quotedcell = do char '"' + content <- many quotedchar + char '"' + return content + +lineOfX :: Char -> GenParser Char st [String] +lineOfX x = sepBy (cellOfX x) (char x) + +genXsvFile :: String -> [[String]] -> String +genXsvFile x inp = + unlines . map xsvline $ inp + where xsvline :: [String] -> String + xsvline l = concat . intersperse x . map xsvcells $ l + xsvcells :: String -> String + xsvcells "" = "" + xsvcells c = '"' : convcell c ++ "\"" + convcell :: String -> String + convcell c = concatMap convchar c + convchar '"' = "\"\"" + convchar x = [x] diff --git a/src/Data/TSV.hs b/src/Data/TSV.hs new file mode 100644 index 00000000..98b10586 --- /dev/null +++ b/src/Data/TSV.hs @@ -0,0 +1,70 @@ +{- | + Module : Data.TSV + Copyright : Copyright (C) 2005-2011 John Goerzen + License : BSD3 + + Maintainer : John Goerzen + Stability : provisional + Portability: portable + +Haskell Parsec parsers for tab-separated value (TSV) files. + +Written by: Aistis Raulinaitis, sheganinans@gmail.com +-} +module Data.TSV where + +import Text.ParserCombinators.Parsec +import Data.SeperatingValues.SeperatingValues + +cell :: GenParser Char st String +cell = cellOfX '\t' + +line :: GenParser Char st [String] +line = lineOfX '\t' + +{- | Parse a Tab-Separated Value (TSV) file. The return value is a list of +lines; each line is a list of cells; and each cell is a String. + +Please note that TSV files may have a different number of cells on each line. +Also, it is impossible to distinguish a TSV line that has a call with no data +from a TSV line that has no cells. + +Here are some examples: + +>Input (literal strings) Parses As (Haskell String syntax) +>-------------------------------- --------------------------------- + +>1 2 3 [["1", "2", "3"]] +> +>l1 [["l1"], ["l2"]] +>l2 +> +> (empty line) [[""]] +> +>NQ "Quoted" [["NQ", "Quoted"]] +> +>NQ "Embedded""Quote" [["NQ", "Embedded\"Quote"]] + +To parse a String, you might use: + +>import Text.ParserCombinators.Parsec +>import Data.String.TSV +>.... +>parse tsvFile "" mystring + +To parse a file, you might instead use: + +>do result <- parseFromFile tsvFile "/path/to/file" + +Please note that the result of parsing will be of type +(Either ParseError [[String]]). A Left result indicates an error. +For more details, see the Parsec information. +-} + +tsvFile :: CharParser st [[String]] +tsvFile = endBy line eol + +{- | Generate TSV data for a file. The resulting string can be +written out to disk directly. -} +genTsvFile :: [[String]] -> String +genTsvFile inp = genXsvFile "\t" inp diff --git a/testsrc/Str/TSVtest.hs b/testsrc/Str/TSVtest.hs new file mode 100644 index 00000000..e411214e --- /dev/null +++ b/testsrc/Str/TSVtest.hs @@ -0,0 +1,33 @@ +{- arch-tag: CSV tests main file +Copyright (C) 2005-2011 John Goerzen + +All rights reserved. + +For license and copyright information, see the file LICENSE + +-} + +module Str.CSVtest(tests) where +import Test.HUnit +import Data.CSV +import Text.ParserCombinators.Parsec + +test_tsv = + let f inp exp = TestLabel inp $ TestCase $ + exp @=? case parse tsvFile "" inp of + Right x -> Right x + Left y -> Left (show y) + in [ + f "" (Right []), + f "\n" (Right [[""]]), + f "1 2 3\n" (Right [["1", "2", "3"]]), + f "This is a Test Really\n" (Right [["This is a", "Test", "Really"]]), + f "l1\nl2\n" (Right [["l1"], ["l2"]]), + f "NQ \"Quoted\"\n" (Right [["NQ", "Quoted"]]), + f "1Q \"\"\"\"\n" (Right [["1Q", "\""]]), + f " \"\"\n" (Right [["", ""]]), + f "\"Embedded\"\"Quote\"\n" (Right [["Embedded\"Quote"]]) + ] + +tests = TestList [TestLabel "tsv" (TestList test_csv)] + diff --git a/testsrc/Tests.hs b/testsrc/Tests.hs index bb9114c2..6a9368f9 100644 --- a/testsrc/Tests.hs +++ b/testsrc/Tests.hs @@ -23,6 +23,7 @@ import qualified HVIOtest import qualified HVFStest import qualified Timetest import qualified Str.CSVtest +import qualified Str.TSVtest import qualified WildMatchtest import qualified Globtest import qualified ProgressTrackertest @@ -33,6 +34,7 @@ tests = TestList [TestLabel "test1" test1, TestLabel "List" Listtest.tests, TestLabel "Str" Strtest.tests, TestLabel "CSV" Str.CSVtest.tests, + TestLabel "TSV" Str.TSVtest.tests, TestLabel "Time" Timetest.tests, TestLabel "Map" Maptest.tests, TestLabel "ProgressTracker" ProgressTrackertest.tests,