From 3a46d6964d778ff5784c697aeac4a2fa03d1a43d Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Thu, 8 Jun 2023 21:39:32 +0300 Subject: [PATCH] Add text-iso8601 --- .github/workflows/haskell-ci.yml | 9 +- .hlint.yaml | 8 + Makefile | 2 +- aeson.cabal | 2 +- attoparsec-iso8601/attoparsec-iso8601.cabal | 2 +- .../src/Data/Attoparsec/Time.hs | 4 + cabal.project | 1 + text-iso8601/LICENSE | 30 + text-iso8601/bench/text-iso8601-bench.hs | 23 + text-iso8601/changelog.md | 3 + text-iso8601/src/Data/Time/FromText.hs | 548 ++++++++++++++++++ text-iso8601/src/Data/Time/ToText.hs | 144 +++++ text-iso8601/tests/text-iso8601-tests.hs | 103 ++++ text-iso8601/text-iso8601.cabal | 89 +++ 14 files changed, 964 insertions(+), 4 deletions(-) create mode 100644 text-iso8601/LICENSE create mode 100644 text-iso8601/bench/text-iso8601-bench.hs create mode 100644 text-iso8601/changelog.md create mode 100644 text-iso8601/src/Data/Time/FromText.hs create mode 100644 text-iso8601/src/Data/Time/ToText.hs create mode 100644 text-iso8601/tests/text-iso8601-tests.hs create mode 100644 text-iso8601/text-iso8601.cabal diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 6dfcd45ce..f9f9515e4 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -213,6 +213,7 @@ jobs: touch cabal.project echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project echo "packages: $GITHUB_WORKSPACE/source/attoparsec-iso8601" >> cabal.project + echo "packages: $GITHUB_WORKSPACE/source/text-iso8601" >> cabal.project echo "packages: $GITHUB_WORKSPACE/source/examples" >> cabal.project echo "packages: $GITHUB_WORKSPACE/source/benchmarks" >> cabal.project cat cabal.project @@ -230,6 +231,8 @@ jobs: echo "PKGDIR_aeson=${PKGDIR_aeson}" >> "$GITHUB_ENV" PKGDIR_attoparsec_iso8601="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/attoparsec-iso8601-[0-9.]*')" echo "PKGDIR_attoparsec_iso8601=${PKGDIR_attoparsec_iso8601}" >> "$GITHUB_ENV" + PKGDIR_text_iso8601="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/text-iso8601-[0-9.]*')" + echo "PKGDIR_text_iso8601=${PKGDIR_text_iso8601}" >> "$GITHUB_ENV" PKGDIR_aeson_examples="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/aeson-examples-[0-9.]*')" echo "PKGDIR_aeson_examples=${PKGDIR_aeson_examples}" >> "$GITHUB_ENV" PKGDIR_aeson_benchmarks="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/aeson-benchmarks-[0-9.]*')" @@ -239,12 +242,15 @@ jobs: touch cabal.project.local echo "packages: ${PKGDIR_aeson}" >> cabal.project echo "packages: ${PKGDIR_attoparsec_iso8601}" >> cabal.project + echo "packages: ${PKGDIR_text_iso8601}" >> cabal.project echo "packages: ${PKGDIR_aeson_examples}" >> cabal.project echo "packages: ${PKGDIR_aeson_benchmarks}" >> cabal.project echo "package aeson" >> cabal.project echo " ghc-options: -Werror=missing-methods" >> cabal.project echo "package attoparsec-iso8601" >> cabal.project echo " ghc-options: -Werror=missing-methods" >> cabal.project + echo "package text-iso8601" >> cabal.project + echo " ghc-options: -Werror=missing-methods" >> cabal.project echo "package aeson-examples" >> cabal.project echo " ghc-options: -Werror=missing-methods" >> cabal.project echo "package aeson-benchmarks" >> cabal.project @@ -252,7 +258,7 @@ jobs: cat >> cabal.project <> cabal.project.local + $HCPKG list --simple-output --names-only | perl -ne 'for (split /\s+/) { print "constraints: $_ installed\n" unless /^(aeson|aeson-benchmarks|aeson-examples|attoparsec-iso8601|text-iso8601)$/; }' >> cabal.project.local cat cabal.project cat cabal.project.local - name: dump install plan @@ -282,6 +288,7 @@ jobs: run: | if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_aeson} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 src attoparsec-iso8601/src src-pure) ; fi if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_attoparsec_iso8601} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 src) ; fi + if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_text_iso8601} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 src) ; fi if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_aeson_examples} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 src/) ; fi if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_aeson_benchmarks} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 .) ; fi if [ $((HCNUMVER >= 90200 && HCNUMVER < 90400)) -ne 0 ] ; then (cd ${PKGDIR_aeson_benchmarks} && hlint -h ${GITHUB_WORKSPACE}/source/.hlint.yaml -XHaskell2010 bench examples/src) ; fi diff --git a/.hlint.yaml b/.hlint.yaml index 2cd36aebb..ed1d6380a 100644 --- a/.hlint.yaml +++ b/.hlint.yaml @@ -13,6 +13,14 @@ name: "Use <=<" within: - Data.Aeson.Types.FromJSON +- ignore: + name: "Avoid lambda" + within: + - Data.Time.FromText +- ignore: + name: "Use isDigit" + within: + - Data.Time.FromText # CPP confuses - ignore: diff --git a/Makefile b/Makefile index 24deb0178..f0ea6f42f 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,2 @@ lint: - ./run-hlint.sh --cpp-include include/ src/ attoparsec-iso8601/ benchmarks/ examples/ src-pure/ tests/ + ./run-hlint.sh --cpp-include include/ src/ attoparsec-iso8601/ benchmarks/ examples/ src-pure/ tests/ text-iso8601/src text-iso8601/tests diff --git a/aeson.cabal b/aeson.cabal index 794845f7a..3ff0ec52c 100644 --- a/aeson.cabal +++ b/aeson.cabal @@ -22,7 +22,7 @@ tested-with: || ==9.6.2 synopsis: Fast JSON parsing and encoding -cabal-version: >=1.10 +cabal-version: 1.12 homepage: https://github.com/haskell/aeson bug-reports: https://github.com/haskell/aeson/issues build-type: Simple diff --git a/attoparsec-iso8601/attoparsec-iso8601.cabal b/attoparsec-iso8601/attoparsec-iso8601.cabal index 6b57bca61..300927859 100644 --- a/attoparsec-iso8601/attoparsec-iso8601.cabal +++ b/attoparsec-iso8601/attoparsec-iso8601.cabal @@ -12,7 +12,7 @@ copyright: author: Bryan O'Sullivan maintainer: Adam Bergmark stability: experimental -cabal-version: >=1.10 +cabal-version: 1.12 homepage: https://github.com/haskell/aeson bug-reports: https://github.com/haskell/aeson/issues build-type: Simple diff --git a/attoparsec-iso8601/src/Data/Attoparsec/Time.hs b/attoparsec-iso8601/src/Data/Attoparsec/Time.hs index 9bba12b7f..ce70eb24f 100644 --- a/attoparsec-iso8601/src/Data/Attoparsec/Time.hs +++ b/attoparsec-iso8601/src/Data/Attoparsec/Time.hs @@ -133,6 +133,9 @@ seconds = do -- | Parse a time zone, and return 'Nothing' if the offset from UTC is -- zero. (This makes some speedups possible.) +-- +-- The accepted formats are @Z@, @+HH@, @+HHMM@, or @+HH:MM@. +-- timeZone :: Parser (Maybe Local.TimeZone) timeZone = do ch <- satisfy $ \c -> c == 'Z' || c == '+' || c == '-' @@ -175,6 +178,7 @@ utcTime = do in return (UTCTime d tt) Just tz -> return $! Local.localTimeToUTC tz lt + -- | Parse a date with time zone info. Acceptable formats: -- -- @YYYY-MM-DD HH:MM Z@ diff --git a/cabal.project b/cabal.project index ca7375fcb..ae5e47e8d 100644 --- a/cabal.project +++ b/cabal.project @@ -1,6 +1,7 @@ with-compiler: ghc packages: . packages: attoparsec-iso8601 +packages: text-iso8601 packages: examples packages: benchmarks tests: true diff --git a/text-iso8601/LICENSE b/text-iso8601/LICENSE new file mode 100644 index 000000000..fc90c466c --- /dev/null +++ b/text-iso8601/LICENSE @@ -0,0 +1,30 @@ +Copyright (c) 2023 Oleg Grenrus + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the author nor the names of his contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/text-iso8601/bench/text-iso8601-bench.hs b/text-iso8601/bench/text-iso8601-bench.hs new file mode 100644 index 000000000..a4e03b140 --- /dev/null +++ b/text-iso8601/bench/text-iso8601-bench.hs @@ -0,0 +1,23 @@ +{-# LANGUAGE OverloadedStrings #-} +module Main (main) where + +import Data.Text (Text) +import Test.Tasty.Bench (defaultMain, bench, nf) + +import qualified Data.Attoparsec.Text as A +import qualified Data.Attoparsec.Time as A + +import Data.Time.FromText (parseUTCTime) + +main :: IO () +main = defaultMain + [ bench "text" $ nf parseUTCTime input1 + , bench "atto" $ nf (runAtto A.utcTime) input1 + ] + +input1 :: Text +input1 = "2023-06-09T16:53:55Z" +{-# NOINLINE input1 #-} + +runAtto :: A.Parser a -> Text -> Either String a +runAtto p t = A.parseOnly (p <* A.endOfInput) t diff --git a/text-iso8601/changelog.md b/text-iso8601/changelog.md new file mode 100644 index 000000000..0ec150b66 --- /dev/null +++ b/text-iso8601/changelog.md @@ -0,0 +1,3 @@ +# 0.1 + +Initial release diff --git a/text-iso8601/src/Data/Time/FromText.hs b/text-iso8601/src/Data/Time/FromText.hs new file mode 100644 index 000000000..62e106a14 --- /dev/null +++ b/text-iso8601/src/Data/Time/FromText.hs @@ -0,0 +1,548 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE ScopedTypeVariables #-} +-- {-# OPTIONS_GHC -ddump-simpl -dsuppress-all -ddump-to-file #-} +-- | +-- +-- The [RFC3339 grammar](https://datatracker.ietf.org/doc/html/rfc3339#section-5.6) is below +-- +-- @ +-- date-fullyear = 4DIGIT +-- date-month = 2DIGIT ; 01-12 +-- date-mday = 2DIGIT ; 01-28, 01-29, 01-30, 01-31 based on month/year +-- time-hour = 2DIGIT ; 00-23 +-- time-minute = 2DIGIT ; 00-59 +-- time-second = 2DIGIT ; 00-58, 00-59, 00-60 based on leap second rules +-- time-secfrac = "." 1*DIGIT +-- time-numoffset = ("+" / "-") time-hour ":" time-minute +-- time-offset = \"Z" / time-numoffset +-- +-- partial-time = time-hour ":" time-minute ":" time-second [time-secfrac] +-- full-date = date-fullyear "-" date-month "-" date-mday +-- full-time = partial-time time-offset +-- +-- date-time = full-date \"T" full-time +-- @ +-- +-- The parsers are a bit more lenient: +-- +-- * We also accept space instead of @T@ date-time separator. (Allowed by RFC3339, forbidden by ISO8601) +-- +-- * Seconds are optional (allowed by ISO8601) +-- +-- * numerical timezone offset can be just @("+" / "-") time-hour@ or without a colon: @("+" / "-") time-hour time-minute@ (allowed by ISO8601). +-- However we require colons in between hours, minutes and seconds in the time (@partial-time@) production, and dashes in @full-date@ production. +-- +-- * We allow over 4 digits in the year part (and that is a reason to require dashes). +-- +-- * We allow @-00:00@ time offset. (Allowed by RFC3339, forbidden by ISO8601) +-- +-- * We always allow time with 60 seconds, we don't consult any leap second database. +-- +module Data.Time.FromText ( + parseDay, + parseLocalTime, + parseTimeOfDay, + parseTimeZone, + parseUTCTime, + parseZonedTime, + parseYear, + parseMonth, + parseQuarter, + parseQuarterOfYear, +) where + +import Data.Bits ((.&.)) +import Data.Char (ord, chr) +import Data.Fixed (Fixed (..), Pico) +import Data.Integer.Conversion (textToInteger) +import Data.Text.Array (Array) +import Data.Text.Internal (Text (..)) +import GHC.Exts (inline) + +import Data.Time.Calendar (Day, fromGregorianValid) +import Data.Time.Calendar.Compat (Year) +import Data.Time.Calendar.Month.Compat (Month, fromYearMonthValid) +import Data.Time.Calendar.Quarter.Compat (Quarter, QuarterOfYear (..), + fromYearQuarter) +import Data.Time.Clock (UTCTime (..)) + +import qualified Data.Text as T +import qualified Data.Text.Array as A +import qualified Data.Time.LocalTime as Local + +-- The parsing functions here are written in continuation passing style +-- with everything marked INLINE and continuation called with GHC.Exts.inline +-- to try to enforce that whole CPS-business goes away (with slight code-duplication). +-- +-- Using staging would be a nicer way to enforce what we want here, +-- but that would require TemplateHaskell. + +------------------------------------------------------------------------------- +-- Public functions +------------------------------------------------------------------------------- + +-- | Parse a date of the form @[+-]YYYY-MM-DD@. +-- +-- The year must contain at least 4 digits, to avoid the Y2K problem: +-- a two-digit year @YY@ may mean @YY@, @19YY@, or @20YY@, and we make it +-- an error to prevent the ambiguity. +-- Years from @0000@ to @0999@ must thus be zero-padded. +-- The year may have more than 4 digits. +-- +parseDay :: Text -> Either String Day +parseDay = parseDay_ expectingEOF + +-- | Parse a month of the form @[+-]YYYY-MM@. +-- +-- See also 'parseDay' for details about the year format. +parseMonth :: Text -> Either String Month +parseMonth = parseMonth_ $ \y m t -> + case fromYearMonthValid y m of + Nothing -> Left $ "invalid month:" ++ show (y, m) + Just !month -> expectingEOF month t + +-- | Parse a year @[+-]YYYY@, with at least 4 digits. Can include a sign. +-- +-- See also 'parseDay' for details about the year format. +-- +-- Note: 'Year' is a type synonym for 'Integer'. +parseYear :: Text -> Either String Year +parseYear = parseYear_ Right $ \_ c _ -> unexpectedChar c "end-of-input" + +-- | Parse a quarter of the form @[+-]YYYY-QN@. +-- +-- See also 'parseDay' for details about the year format. +-- +parseQuarter :: Text -> Either String Quarter +parseQuarter = parseQuarter_ $ \y q t -> + let !quarter = fromYearQuarter y q in expectingEOF quarter t + +-- | Parse a quarter of year of the form @QN@ or @qN@. +parseQuarterOfYear :: Text -> Either String QuarterOfYear +parseQuarterOfYear = parseQuarterOfYear_ expectingEOF + +-- | Parse a time of the form @HH:MM[:SS[.SSS]]@. +parseTimeOfDay :: Text -> Either String Local.TimeOfDay +parseTimeOfDay = parseTimeOfDay_ kontEOF $ \_ _ _ c _ -> unexpectedChar c "end-of-input" where + kontEOF h m s = makeTimeOfDay h m s Right + +-- | Parse a time zone. +-- +-- The accepted formats are @Z@, @+HH@, @+HHMM@, or @+HH:MM@. (@+@ can be @-@). +-- +parseTimeZone :: Text -> Either String Local.TimeZone +parseTimeZone = parseTimeZone_ Right + +-- | Parse a date and time, of the form @YYYY-MM-DD HH:MM[:SS[.SSS]]@. +-- The space may be replaced with a @T@. The number of seconds is optional +-- and may be followed by a fractional component. +parseLocalTime :: Text -> Either String Local.LocalTime +parseLocalTime = parseLocalTime_ Right $ \_ c _ -> unexpectedChar c "end-of-input" + +-- | Behaves as 'zonedTime', but converts any time zone offset into a +-- UTC time. +parseUTCTime :: Text -> Either String UTCTime +parseUTCTime = parseUTCTime_ Right + +-- | Parse a date with time zone info. Acceptable formats: +-- +-- @ +-- YYYY-MM-DD HH:MMZ +-- YYYY-MM-DD HH:MM:SSZ +-- YYYY-MM-DD HH:MM:SS.SSSZ +-- @ +-- +-- The first space may instead be a @T@, and the second space is +-- optional. The @Z@ represents UTC. The @Z@ may be replaced with a +-- time zone offset of the form @+0000@ or @-08:00@, where the first +-- two digits are hours, the @:@ is optional and the second two digits +-- (also optional) are minutes. +parseZonedTime :: Text -> Either String Local.ZonedTime +parseZonedTime = parseZonedTime_ Right + +------------------------------------------------------------------------------- +-- Uncons +------------------------------------------------------------------------------- + +-- As all characters in the time format are ASCII +-- we can use slightly more efficient (or at least smaller) uncons. + +{-# INLINE unconsAscii_ #-} +unconsAscii_ + :: Array -> Int -> Int + -> Either String r -- ^ EOF continuation + -> (Char -> Int -> Int -> Either String r) -- ^ character continuation + -> Either String r +unconsAscii_ arr off len kontEOF kontC + | len <= 0 = inline kontEOF + | c < 0x80 = inline kontC (chr (fromIntegral c)) (off + 1) (len - 1) + | otherwise = Left "Non-ASCII character" + where + c = A.unsafeIndex arr off + +{-# INLINE unconsAscii #-} +unconsAscii :: Either String r -> (Char -> Text -> Either String r) -> Text -> Either String r +unconsAscii kontEOF kontC (Text arr off len) = + unconsAscii_ arr off len kontEOF $ \c off' len' -> + inline kontC c (Text arr off' len') + +------------------------------------------------------------------------------- +-- Expecting errors +------------------------------------------------------------------------------- + +expectingEOF :: r -> Text -> Either String r +expectingEOF = expectingEOF_ Right +{-# INLINE expectingEOF #-} + +expectingEOF_ :: (a -> Either String r) -> a -> Text -> Either String r +expectingEOF_ kont a t = case T.uncons t of + Nothing -> inline kont a + Just (c, _) -> unexpectedChar c "end-of-input" +{-# INLINE expectingEOF_ #-} + +unexpectedEOF :: String -> Either String r +unexpectedEOF expected = Left $ "Unexpected end-of-input, expecting " ++ expected + +unexpectedChar :: Char -> String -> Either String r +unexpectedChar c expected = Left $ "Unexpected '" ++ c : "', expecting " ++ expected + +------------------------------------------------------------------------------- +-- Helpers +------------------------------------------------------------------------------- + +{-# INLINE fromChar #-} +fromChar :: Char -> Int +fromChar c = ord c .&. 0xf + +{-# INLINE twoDigits #-} +twoDigits + :: (Int -> Text -> Either String r) + -> Text + -> Either String r +twoDigits kont = + unconsAscii (unexpectedEOF "a digit") $ \c1 -> if + | '0' <= c1, c1 <= '9' -> unconsAscii (unexpectedEOF "a digit") $ \c2 -> if + | '0' <= c2, c2 <= '9' -> inline kont (fromChar c1 * 10 + fromChar c2) + | otherwise -> \_ -> unexpectedChar c2 "a digit" + | otherwise -> \_ -> unexpectedChar c1 "a digit" + +{-# INLINE munchDigits #-} +munchDigits + :: (Text -> Either String r) + -> (Text -> Char -> Text -> Either String r) + -> Text + -> Either String r +munchDigits kontEOF kontC (Text arr off len) = + munchDigits_ kontEOF kontC arr off off len + +{-# INLINE munchDigits_ #-} +munchDigits_ + :: (Text -> Either String r) + -> (Text -> Char -> Text -> Either String r) + -> Array + -> Int + -> Int + -> Int + -> Either String r +munchDigits_ kontEOF kontC arr = loop where + loop off0 off len = unconsAscii_ arr off len (inline kontEOF (Text arr off0 (off - off0))) $ \c off' len' -> if + | '0' <= c, c <= '9' -> loop off0 off' len' + | otherwise -> inline kontC (Text arr off0 (off - off0)) c (Text arr off' len') + +utcTimeZone :: Local.TimeZone +utcTimeZone = Local.TimeZone 0 False "" + +------------------------------------------------------------------------------- +-- Implementation: Dates +------------------------------------------------------------------------------- + +-- parse year: @[+-]YYYY@. +-- Two continuations as we look at the following character. +{-# INLINE parseYear_ #-} +parseYear_ + :: forall r. (Year -> Either String r) + -> (Year -> Char -> Text -> Either String r) + -> Text + -> Either String r +parseYear_ kontEOF kontC (Text arr offS lenS) = start offS lenS where + start :: Int -> Int -> Either String r + start !off !len = unconsAscii_ arr off len + (unexpectedEOF "-, +, or a digit") $ \c off' len' -> case c of + '-' -> loop negate off' off' len' + '+' -> loop id off' off' len' + _ + | '0' <= c, c <= '9' -> loop id off off' len' + | otherwise -> Left $ "Unexpected '" ++ show c ++ ", expecting -, +, or a digit" + + loop :: (Integer -> Integer) -> Int -> Int -> Int -> Either String r + loop !posNeg !off0 !off !len = unconsAscii_ arr off len (finishEOF posNeg off0 off) $ \c off' len' -> if + | '0' <= c, c <= '9' -> loop posNeg off0 off' len' + | otherwise -> finishC posNeg c off0 off off' len' + + finishEOF :: (Integer -> Integer) -> Int -> Int -> Either String r + finishEOF !posNeg !off0 !off + | len0 >= 4 + = year `seq` kontEOF year + + | otherwise + = Left "expected year with at least 4 digits" + where + len0 = off - off0 + year = posNeg (textToInteger (Text arr off0 len0)) + {-# INLINE finishEOF #-} + + finishC :: (Integer -> Integer) -> Char -> Int -> Int -> Int -> Int-> Either String r + finishC !posNeg c !off0 !off !off' !len' + | len0 >= 4 + = year `seq` kontC year c (Text arr off' len') + + | otherwise + = Left "expected year with at least 4 digits" + where + len0 = off - off0 + year = posNeg (textToInteger (Text arr off0 len0)) + {-# INLINE finishC #-} + +{-# INLINE parseYear__ #-} +-- parse year and the following dash: @[+-]YYYY-@ +parseYear__ + :: forall r. (Year -> Text -> Either String r) + -> Text + -> Either String r +parseYear__ kont = + parseYear_ (\_ -> unexpectedEOF "a dash after a year part") $ \ !y c t -> + if c == '-' + then kont y t + else unexpectedChar c "a dash after a year part" + +-- parse month: @[-+]YYYY-MM@ +{-# INLINE parseMonth_ #-} +parseMonth_ + :: forall r. (Year -> Int -> Text -> Either String r) + -> Text + -> Either String r +parseMonth_ kont = + parseYear__ $ \ !y -> + twoDigits $ \ !m -> + kont y m + +-- parse day: @[-+]YYYY-MM-DD@ +{-# INLINE parseDay_ #-} +parseDay_ + :: forall r. (Day -> Text -> Either String r) + -> Text + -> Either String r +parseDay_ kont = + parseMonth_ $ \y m -> + skipDash $ + twoDigits $ \d -> + case fromGregorianValid y m d of + Nothing -> \_ -> Left $ "invalid day:" ++ show (y, m, d) + Just !day -> inline kont day + +-- parse quarter: @[+-]YYYY-QN@ +{-# INLINE parseQuarter_ #-} +parseQuarter_ + :: forall r. (Year -> QuarterOfYear -> Text -> Either String r) + -> Text + -> Either String r +parseQuarter_ kont = + parseYear__ $ \y -> + parseQuarterOfYear_ $ \q -> + inline kont y q + +{-# INLINE parseQuarterOfYear_ #-} +parseQuarterOfYear_ + :: forall r. (QuarterOfYear -> Text -> Either String r) + -> Text + -> Either String r +parseQuarterOfYear_ kont = + unconsAscii (unexpectedEOF "QuarterOfYear") $ \c -> if + | 'Q' == c || 'q' == c -> unconsAscii (unexpectedEOF "Quarter digit") $ \c' -> case c' of + '1' -> inline kont Q1 + '2' -> inline kont Q2 + '3' -> inline kont Q3 + '4' -> inline kont Q4 + _ -> \_ -> unexpectedChar c' "QuarterOfYear digit" + + | otherwise -> \_ -> unexpectedChar c "QuarterOfYear" + +{-# INLINE skipDash #-} +skipDash + :: forall r. (Text -> Either String r) + -> Text + -> Either String r +skipDash kont = unconsAscii (unexpectedEOF "a dash, -") $ \c -> + if c == '-' + then inline kont + else \_ -> unexpectedChar c "a dash, -" + +------------------------------------------------------------------------------- +-- Implementation: Time +------------------------------------------------------------------------------- + +-- Parse time of day : @HH:MM[:SS[.SSS]]@ +{-# INLINE parseTimeOfDay_ #-} +parseTimeOfDay_ + :: (Int -> Int -> Pico -> Either String r) + -> (Int -> Int -> Pico -> Char -> Text -> Either String r) + -> Text + -> Either String r +parseTimeOfDay_ kontEOF kontC = + twoDigits $ \h -> + skipColon $ + twoDigits $ \m -> unconsAscii (inline kontEOF h m 0) $ \ c -> + if c == ':' + then parseSeconds_ (inline kontEOF h m) (inline kontC h m) + else inline kontC h m 0 c + +{-# INLINE parseTimeOfDay__ #-} +parseTimeOfDay__ + :: (Local.TimeOfDay -> Either String r) + -> (Local.TimeOfDay -> Char -> Text -> Either String r) + -> Text + -> Either String r +parseTimeOfDay__ kontEOF kontC = parseTimeOfDay_ + (\h m s -> makeTimeOfDay h m s kontEOF) + (\h m s c t -> makeTimeOfDay h m s $ \l -> inline kontC l c t) + +{-# INLINE makeTimeOfDay #-} +makeTimeOfDay :: Int -> Int -> Pico -> (Local.TimeOfDay -> Either String r) -> Either String r +makeTimeOfDay h m s kont = + if h < 24 && m < 60 && s < 61 + then inline kont (Local.TimeOfDay h m s) + else Left $ "Invalid time of day:" ++ show (h,m,s) + +-- Parse seconds: @SS.SSS@. +-- +{-# INLINE parseSeconds_ #-} +parseSeconds_ + :: (Pico -> Either String r) + -> (Pico -> Char -> Text -> Either String r) + -> Text + -> Either String r +parseSeconds_ kontEOF kontC = + twoDigits $ \real -> + unconsAscii (inline kontEOF (fromIntegral real)) $ \c -> + if c == '.' + then munchDigits (\i -> makeSeconds real kontEOF i) (\i c' t -> makeSeconds real (\j -> inline kontC j c' t) i) + else kontC (MkFixed $ toInteger real * pico) c + +{-# INLINE makeSeconds #-} +makeSeconds :: Int -> (Pico -> Either String r) -> Text -> Either String r +makeSeconds real kont t@(Text _arr _off len) + | len == 0 + = Left "Expecting at least one decimal after a dot" + + | len > 12 + = Left "Unexpectedly over twelve decimals" + + | otherwise + = inline kont (MkFixed (toInteger real * pico + textToInteger t * 10 ^ expo)) + where + expo = 12 - len + +{-# INLINE parseTimeZone_ #-} +parseTimeZone_ + :: (Local.TimeZone -> Either String r) + -> Text + -> Either String r +parseTimeZone_ kont = + inline unconsAscii (unexpectedEOF "timezone: Z, +HH:MM or -HH:MM") $ \c t -> + parseTimeZone__ () (\_ -> inline kont) c t + +pico :: Integer +pico = 1000000000000 -- 12 zeros + +{-# INLINE parseTimeZone__ #-} +parseTimeZone__ + :: a -- "extra bit of state" + -> (a -> Local.TimeZone -> Either String r) + -> Char + -> Text + -> Either String r +parseTimeZone__ x kont c t0 = case c of + '-' -> hhmm x negate t0 + '+' -> hhmm x id t0 + 'Z' -> expectingEOF_ (inline kont x) utcTimeZone t0 + _ -> unexpectedChar c "timezone: Z, +HH:MM or -HH:MM" + where + hhmm y posNeg = + twoDigits $ \hh -> + unconsAscii (withResult posNeg hh 0 (kont y)) $ \c1 -> case c1 of + ':' -> + twoDigits $ \mm -> + expectingEOF_ (\mm' -> withResult posNeg hh mm' (kont y)) mm + + _ | '0' <= c1, c1 <= '9' -> + unconsAscii (unexpectedEOF "a digit") $ \c2 -> + if '0' <= c2 && c2 <= '9' + then expectingEOF_ (\mm' -> withResult posNeg hh mm' (kont y)) (fromChar c1 * 10 + fromChar c2) + else \_ -> unexpectedChar c2 "a digit" + + _ -> \_ -> unexpectedChar c1 "colon or a digit" + + withResult :: (Int -> Int) -> Int -> Int -> (Local.TimeZone -> Either String b) -> Either String b + withResult posNeg hh mm kontR = + let off = posNeg (hh * 60 + mm) + in if off < (-720) || off > 840 || mm > 59 + then Left $ "Invalid TimeZone:" ++ show (hh, mm) + else kontR (Local.minutesToTimeZone off) + + +{-# INLINE parseLocalTime_ #-} +parseLocalTime_ + :: (Local.LocalTime -> Either String r) + -> (Local.LocalTime -> Char -> Text -> Either String r) + -> Text + -> Either String r +parseLocalTime_ kontEOF kontC = + parseDay_ $ \d -> + skipDaySeparator $ + parseTimeOfDay__ + (\l -> inline kontEOF (Local.LocalTime d l)) + (\l c t -> inline kontC (Local.LocalTime d l) c t) + +{-# INLINE parseUTCTime_ #-} +parseUTCTime_ + :: (UTCTime -> Either String r) + -> Text + -> Either String r +parseUTCTime_ kont = parseZonedTime_ $ \zt -> inline kont (Local.zonedTimeToUTC zt) + +{-# INLINE parseZonedTime_ #-} +parseZonedTime_ + :: (Local.ZonedTime -> Either String r) + -> Text + -> Either String r +parseZonedTime_ kont = + parseLocalTime_ (\_ -> unexpectedEOF "timezone") $ \lt c -> + parseZT kont lt c + +{-# INLINE parseZT #-} +parseZT + :: (Local.ZonedTime -> Either String r) + -> Local.LocalTime + -> Char -> Text -> Either String r +parseZT kont lt = parseTimeZone__ lt $ \lt' tz -> inline kont (Local.ZonedTime lt' tz) + +{-# INLINE skipColon #-} +skipColon + :: (Text -> Either String r) + -> Text + -> Either String r +skipColon kont = unconsAscii (unexpectedEOF "a colon, :") $ \c -> + if c == ':' + then inline kont + else \_ -> unexpectedChar c "a colon, :" + +{-# INLINE skipDaySeparator #-} +skipDaySeparator + :: (Text -> Either String r) + -> Text + -> Either String r +skipDaySeparator kont = unconsAscii (unexpectedEOF "a day separator, T or space") $ \c -> + if c == 'T' || c == ' ' + then inline kont + else \_ -> unexpectedChar c "a day separator, T or space" diff --git a/text-iso8601/src/Data/Time/ToText.hs b/text-iso8601/src/Data/Time/ToText.hs new file mode 100644 index 000000000..b47bcd29d --- /dev/null +++ b/text-iso8601/src/Data/Time/ToText.hs @@ -0,0 +1,144 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +module Data.Time.ToText ( + buildDay, + buildLocalTime, + buildTimeOfDay, + buildTimeZone, + buildUTCTime, + buildZonedTime, + buildYear, + buildMonth, + buildQuarter, + buildQuarterOfYear, +) where + +import Data.Char (chr) +import Data.Fixed (Fixed (..)) +import Data.Int (Int64) +import Data.Text.Lazy.Builder (Builder) + +import Data.Time (TimeOfDay (..)) +import Data.Time.Calendar (Day, toGregorian) +import Data.Time.Calendar.Compat (Year) +import Data.Time.Calendar.Month.Compat (Month, toYearMonth) +import Data.Time.Calendar.Quarter.Compat (Quarter, QuarterOfYear (..), + toYearQuarter) +import Data.Time.Clock (UTCTime (..)) + +import qualified Data.Text.Lazy.Builder as B +import qualified Data.Text.Lazy.Builder.Int as B (decimal) +import qualified Data.Time.LocalTime as Local + +#if !MIN_VERSION_base(4,11,0) +import Data.Semigroup ((<>)) +#endif + +buildDay :: Day -> Builder +buildDay dd = buildYear yr <> char7 '-' <> digits2 m <> char7 '-' <> digits2 d + where (yr,m,d) = toGregorian dd +{-# INLINE buildDay #-} + +buildMonth :: Month -> Builder +buildMonth mm = buildYear yr <> char7 '-' <> digits2 m + where (yr,m) = toYearMonth mm +{-# INLINE buildMonth #-} + +buildQuarter :: Quarter -> Builder +buildQuarter qq = buildYear yr <> char7 '-' <> buildQuarterOfYear q + where (yr,q) = toYearQuarter qq +{-# INLINE buildQuarter #-} + +buildQuarterOfYear :: QuarterOfYear -> Builder +buildQuarterOfYear q = char7 'q' <> case q of + Q1 -> char7 '1' + Q2 -> char7 '2' + Q3 -> char7 '3' + Q4 -> char7 '4' + +-- | Used in encoding day, month, quarter +buildYear :: Year -> Builder +buildYear y + | y >= 1000 = B.decimal y + | y >= 0 = padYear y + | y >= -999 = char7 '-' <> padYear (negate y) + | otherwise = B.decimal y + where + padYear y' = + let (ab,c) = fromIntegral y' `quotRem` 10 + (a,b) = ab `quotRem` 10 + in char7 '0' <> digit a <> digit b <> digit c +{-# INLINE buildYear #-} + +buildTimeOfDay :: TimeOfDay -> Builder +buildTimeOfDay (TimeOfDay h m (MkFixed s)) = + digits2 h <> char7 ':' <> + digits2 m <> char7 ':' <> + digits2 (fromInteger real) <> buildFrac (fromInteger frac) + where + (real,frac) = s `quotRem` pico + + buildFrac :: Int64 -> Builder + buildFrac 0 = mempty + buildFrac i = char7 '.' <> case i `quotRem` micro of + (hi, 0) -> buildFrac6 hi + (hi, lo) -> digits6 hi <> buildFrac6 lo + + buildFrac6 :: Int64 -> Builder + buildFrac6 i = case i `quotRem` milli of + (hi, 0) -> digits3 hi + (hi, lo) -> digits3 hi <> digits3 lo + + digits6 i = case i `quotRem` milli of + (hi, lo) -> digits3 hi <> digits3 lo + + digits3 i = digit64 a <> digit64 b <> digit64 c + where + (ab, c) = i `quotRem` 10 + (a, b) = ab `quotRem` 10 + + pico = 1000000000000 -- number of picoseconds in 1 second + micro = 1000000 -- number of microseconds in 1 second + milli = 1000 -- number of milliseconds in 1 second +{-# INLINE buildTimeOfDay #-} + +buildTimeZone :: Local.TimeZone -> Builder +buildTimeZone (Local.TimeZone off _ _) + | off == 0 = char7 'Z' + | otherwise = char7 s <> digits2 h <> char7 ':' <> digits2 m + where !s = if off < 0 then '-' else '+' + (h,m) = abs off `quotRem` 60 +{-# INLINE buildTimeZone #-} + +dayTime :: Day -> TimeOfDay -> Builder +dayTime d t = buildDay d <> char7 'T' <> buildTimeOfDay t +{-# INLINE dayTime #-} + +buildUTCTime :: UTCTime -> B.Builder +buildUTCTime (UTCTime d s) = dayTime d (Local.timeToTimeOfDay s) <> char7 'Z' +{-# INLINE buildUTCTime #-} + +buildLocalTime :: Local.LocalTime -> Builder +buildLocalTime (Local.LocalTime d t) = dayTime d t +{-# INLINE buildLocalTime #-} + +buildZonedTime :: Local.ZonedTime -> Builder +buildZonedTime (Local.ZonedTime t z) = buildLocalTime t <> buildTimeZone z +{-# INLINE buildZonedTime #-} + +------------------------------------------------------------------------------- +-- Utilities +------------------------------------------------------------------------------- + +digits2 :: Int -> Builder +digits2 a = digit hi <> digit lo + where (hi,lo) = a `quotRem` 10 + +digit :: Int -> Builder +digit x = char7 (chr (x + 48)) + +digit64 :: Int64 -> Builder +digit64 = digit . fromIntegral + +char7 :: Char -> Builder +char7 = B.singleton diff --git a/text-iso8601/tests/text-iso8601-tests.hs b/text-iso8601/tests/text-iso8601-tests.hs new file mode 100644 index 000000000..e79ed3b7f --- /dev/null +++ b/text-iso8601/tests/text-iso8601-tests.hs @@ -0,0 +1,103 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +module Main (main) where + +import Data.Functor.Classes (liftEq) +import Data.Proxy (Proxy (..)) +import Data.Text (Text) +import Data.Text.Lazy.Builder (Builder, toLazyText) +import Data.Time.LocalTime.Compat (TimeZone (..), ZonedTime (..)) +import Data.Typeable (Typeable, typeRep) +import Test.QuickCheck (Arbitrary, counterexample, + property) +import Test.QuickCheck.Instances () +import Test.Tasty (TestTree, defaultMain, testGroup) +import Test.Tasty.HUnit (assertFailure, testCase) +import Test.Tasty.QuickCheck (testProperty) + +import qualified Data.Text as T +import qualified Data.Text.Lazy as LT +import qualified Data.Time.FromText as T +import qualified Data.Time.ToText as T + +main :: IO () +main = defaultMain $ testGroup "text-iso8601" + [ testGroup "roundtrip" + [ roundtrip (==) T.buildDay T.parseDay + , roundtrip (==) T.buildLocalTime T.parseLocalTime + , roundtrip eqTZ T.buildTimeZone T.parseTimeZone + , roundtrip (==) T.buildUTCTime T.parseUTCTime + , roundtrip eqZT T.buildZonedTime T.parseZonedTime + , roundtrip (==) T.buildTimeOfDay T.parseTimeOfDay + , roundtrip (==) T.buildYear T.parseYear + , roundtrip (==) T.buildMonth T.parseMonth + , roundtrip (==) T.buildQuarter T.parseQuarter + , roundtrip (==) T.buildQuarterOfYear T.parseQuarterOfYear + ] + + , testGroup "accepts" + -- we accept space instead of T + -- RFC3339 has a note suggesting allowing this. + [ accepts T.parseUTCTime "2023-06-09 02:35:33Z" + + -- 60 second is always accepted + , accepts T.parseUTCTime "2023-06-09T02:35:60Z" + + -- examples from RFC3339 + , accepts T.parseUTCTime "1985-04-12T23:20:50.52Z" + , accepts T.parseUTCTime "1996-12-19T16:39:57-08:00" + , accepts T.parseUTCTime "1990-12-31T23:59:60Z" + , accepts T.parseUTCTime "1990-12-31T15:59:60-08:00" + , accepts T.parseUTCTime "1937-01-01T12:00:27.87+00:20" + + -- we accept time without seconds + , accepts T.parseUTCTime "1937-01-01 12:00Z" + , accepts T.parseLocalTime "1937-01-01 12:00" + + -- ISO8601 allows various offsets, while RFC3339 only +-HH:MM + , accepts T.parseUTCTime "1990-12-31T15:59:60-0800" -- no colon + , accepts T.parseUTCTime "1990-12-31T15:59:60-08" -- just hour + ] + + , testGroup "rejected" + -- https://github.com/haskell/aeson/issues/1033 + [ rejects T.parseUTCTime "2023-06-09T02:35:33 Z" + + -- Y2K years + , rejects T.parseDay "99-12-12" + + -- we don't accept lowercase T or Z + -- RFC3339 says we MAY limit, i.e. requiring they should be uppercase. + , rejects T.parseUTCTime "2023-06-09T02:35:33z" + , rejects T.parseUTCTime "2023-06-09t02:35:33Z" + ] + ] + +eqTZ :: TimeZone -> TimeZone -> Bool +eqTZ a b = timeZoneMinutes a == timeZoneMinutes b + +eqZT :: ZonedTime -> ZonedTime -> Bool +eqZT (ZonedTime lt tz) (ZonedTime lt' tz') = + lt == lt' && eqTZ tz tz' + +roundtrip + :: forall a. (Typeable a, Arbitrary a, Show a) + => (a -> a -> Bool) -> (a -> Builder) -> (Text -> Either String a) -> TestTree +roundtrip eq build parse = testProperty (show (typeRep (Proxy :: Proxy a))) $ \x -> + let lt = toLazyText (build x) + y = parse (LT.toStrict lt) + in counterexample (LT.unpack lt) $ + counterexample (show y) $ + property (liftEq eq y (Right x)) + +rejects :: forall a. (Typeable a, Show a) => (Text -> Either String a) -> String -> TestTree +rejects parse inp = testCase (show (typeRep (Proxy :: Proxy a)) ++ " accepts " ++ show inp) $ do + case parse (T.pack inp) of + Left _ -> return () + Right a -> assertFailure $ "Unexpectedly accepted: " ++ show a + +accepts :: forall a. (Typeable a, Show a) => (Text -> Either String a) -> String -> TestTree +accepts parse inp = testCase (show (typeRep (Proxy :: Proxy a)) ++ " rejects " ++ show inp) $ do + case parse (T.pack inp) of + Left err -> assertFailure $ "Unexpectedly rejected: " ++ err + Right _ -> return () diff --git a/text-iso8601/text-iso8601.cabal b/text-iso8601/text-iso8601.cabal new file mode 100644 index 000000000..59e2b79ae --- /dev/null +++ b/text-iso8601/text-iso8601.cabal @@ -0,0 +1,89 @@ +cabal-version: 1.12 +name: text-iso8601 +version: 0.1 +synopsis: Converting time to and from ISO 8601 text. +description: + Converting time to and from IS0 8601 text. + Specifically the [RFC3339](https://datatracker.ietf.org/doc/html/rfc3339) profile. + +license: BSD3 +license-file: LICENSE +category: Parsing +copyright: Oleg Grenrus +author: Oleg Grenrus +maintainer: + Oleg Grenrus + +homepage: https://github.com/haskell/aeson +bug-reports: https://github.com/haskell/aeson/issues +build-type: Simple +tested-with: + GHC ==8.2.2 + || ==8.4.4 + || ==8.6.5 + || ==8.8.4 + || ==8.10.7 + || ==9.0.2 + || ==9.2.8 + || ==9.4.5 + || ==9.6.2 + +extra-source-files: changelog.md + +source-repository head + type: git + location: git://github.com/haskell/aeson.git + subdir: text-iso8601 + +library + default-language: Haskell2010 + hs-source-dirs: src + ghc-options: -Wall + exposed-modules: + Data.Time.FromText + Data.Time.ToText + + build-depends: + base >=4.10.0.0 && <5 + , integer-conversion >=0.1 && <0.2 + , text >=1.2.3.0 && <1.3.0.0 || >=2.0 && <2.1 + , time >=1.6.0.1 && <1.13 + , time-compat >=1.9.4 && <1.10 + +test-suite text-iso8601-tests + default-language: Haskell2010 + hs-source-dirs: tests + type: exitcode-stdio-1.0 + main-is: text-iso8601-tests.hs + ghc-options: -Wall + build-depends: + base + , text + , text-iso8601 + , time-compat + + -- test dependencies + build-depends: + QuickCheck >=2.14.3 && <2.15 + , quickcheck-instances >=0.3.29.1 && <0.4 + , tasty >=1.4.3 && <1.5 + , tasty-hunit >=0.10.0.3 && <0.11 + , tasty-quickcheck >=0.10.2 && <0.11 + +benchmark text-iso8601-bench + default-language: Haskell2010 + hs-source-dirs: bench + type: exitcode-stdio-1.0 + main-is: text-iso8601-bench.hs + ghc-options: -Wall + build-depends: + base + , text + , text-iso8601 + , time-compat + + -- bench dependencies + build-depends: + attoparsec >=0.14.4 && <0.15 + , attoparsec-iso8601 >=1.1.0.1 && <1.2 + , tasty-bench >=0.3.4 && <0.4