From e324a09fb83501830c0678eb8a3194125250c3c3 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 17 Jun 2024 16:59:02 +0200 Subject: [PATCH] core: Improve Case & Unfold --- unicode-data/lib/Unicode/Char/Case.hs | 5 ++++- unicode-data/lib/Unicode/Char/Case/Compat.hs | 11 ++++++----- unicode-data/lib/Unicode/Internal/Unfold.hs | 15 +++++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/unicode-data/lib/Unicode/Char/Case.hs b/unicode-data/lib/Unicode/Char/Case.hs index c23b3719..3580fb52 100644 --- a/unicode-data/lib/Unicode/Char/Case.hs +++ b/unicode-data/lib/Unicode/Char/Case.hs @@ -249,4 +249,7 @@ step = \case where -- Mask for a single Unicode code point: (1 << 21) - 1 mask = 0x1fffff - cp = fromIntegral (s .&. mask) + -- [NOTE] As of GHC 9.4, Int64 is represented internally by Int64#, + -- so the previous code `fromIntegral s .&. mask` leads to + -- unefficient generated code. + cp = fromIntegral s .&. mask diff --git a/unicode-data/lib/Unicode/Char/Case/Compat.hs b/unicode-data/lib/Unicode/Char/Case/Compat.hs index f01fa92e..0801dcc2 100644 --- a/unicode-data/lib/Unicode/Char/Case/Compat.hs +++ b/unicode-data/lib/Unicode/Char/Case/Compat.hs @@ -31,8 +31,9 @@ import qualified Unicode.Internal.Char.UnicodeData.SimpleUpperCaseMapping as C -- Title case is used by a small number of letter ligatures like the -- single-character form of /Lj/. -- --- It matches characters with general category 'UppercaseLetter' and --- 'TitlecaseLetter'. +-- It matches characters with general category +-- 'Unicode.Char.General.UppercaseLetter' and +-- 'Unicode.Char.General.TitlecaseLetter'. -- -- See: 'Unicode.Char.Case.isUpperCase' for the /full upper/ case predicate. -- @@ -48,11 +49,11 @@ isUpper c = UC.UppercaseLetter -> True UC.TitlecaseLetter -> True _ -> False - where cp = ord c + where !cp = ord c -- | Selects lower-case alphabetic Unicode characters (letters). -- --- It matches characters with general category 'LowercaseLetter'. +-- It matches characters with general category 'Unicode.Char.Case.LowercaseLetter'. -- -- See: 'Unicode.Char.Case.isLowerCase' for the /full/ lower case predicate. -- @@ -67,7 +68,7 @@ isLower c = case UC.generalCategoryPlanes0To3 cp of UC.LowercaseLetter -> True _ -> False - where cp = ord c + where !cp = ord c -- | Convert a letter to the corresponding upper-case letter, if any. -- Any other character is returned unchanged. diff --git a/unicode-data/lib/Unicode/Internal/Unfold.hs b/unicode-data/lib/Unicode/Internal/Unfold.hs index c8ae6268..b1629035 100644 --- a/unicode-data/lib/Unicode/Internal/Unfold.hs +++ b/unicode-data/lib/Unicode/Internal/Unfold.hs @@ -1,6 +1,5 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE ExistentialQuantification #-} -{-# LANGUAGE LambdaCase #-} -- | -- Module : Unicode.Internal.Unfold @@ -19,6 +18,8 @@ module Unicode.Internal.Unfold , toList ) where +import GHC.Base (build) + -- | An @Unfold a b@ is a generator of a stream of values of type @b@ from a -- seed of type @a@. -- @@ -57,8 +58,10 @@ toList :: Unfold a a -> a -> [a] toList (Unfold step inject) input = case inject input of Stop -> [input] - Yield b s -> b : go (step s) - where - go = \case - Yield b s -> let !s' = step s in b : go s' - Stop -> [] + Yield b s0 -> b : build + ( \c n -> + let go s = case step s of + Yield a s' -> a `c` go s' + Stop -> n + in go s0 + )