From 093f2d69f3c138cc29fbbb90e52daad2b312c749 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 17 Jun 2024 17:28:35 +0200 Subject: [PATCH] core: Improve Blocks --- ucd2haskell/exe/UCD2Haskell/Modules/Blocks.hs | 57 +- .../lib/Unicode/Char/General/Blocks.hs | 38 +- unicode-data/lib/Unicode/Internal/Bits.hs | 13 +- .../lib/Unicode/Internal/Char/Blocks.hs | 687 +++++++++--------- unicode-data/test/Unicode/CharSpec.hs | 4 + unicode-data/unicode-data.cabal | 3 + 6 files changed, 424 insertions(+), 378 deletions(-) diff --git a/ucd2haskell/exe/UCD2Haskell/Modules/Blocks.hs b/ucd2haskell/exe/UCD2Haskell/Modules/Blocks.hs index f465aa6b..44962993 100644 --- a/ucd2haskell/exe/UCD2Haskell/Modules/Blocks.hs +++ b/ucd2haskell/exe/UCD2Haskell/Modules/Blocks.hs @@ -23,7 +23,8 @@ recipe = ModuleRecipe genBlocksModule data Acc = Acc - { blocks :: ![BB.Builder] + { count :: !Word + , blocks :: ![BB.Builder] , defs :: ![BB.Builder] , ranges :: ![(Int, Int)] } @@ -33,10 +34,11 @@ genBlocksModule moduleName = Fold step initial done done Acc{..} = let ranges' = reverse ranges in unlinesBB [ apacheLicense 2022 moduleName + , "{-# LANGUAGE LambdaCase #-}" , "{-# OPTIONS_HADDOCK hide #-}" , "" , "module " <> moduleName - , "(Block(..), BlockDefinition(..), block, blockDefinition)" + , "(Block(..), block, blockDefinition)" , "where" , "" , "import Data.Ix (Ix)" @@ -53,25 +55,25 @@ genBlocksModule moduleName = Fold step initial done , " = " <> mconcat (L.intersperse "\n | " (reverse blocks)) , " deriving (Enum, Bounded, Eq, Ord, Ix, Show)" , "" - , "-- | Block definition: range and name." - , "--" - , "-- @since 0.3.1" - , "data BlockDefinition = BlockDefinition" - , " { blockRange :: !(Int, Int) -- ^ Range" - , " , blockName :: !String -- ^ Name" - , " } deriving (Eq, Ord, Show)" - , "" , "-- | Block definition" , "--" + , "-- Undefined for values greater than " <> BB.wordDec (pred count) <> "." + , "--" + , "-- Returned value:" + , "--" + , "-- * Lower bound" + , "-- * Upper bound" + , "-- * Name (null terminated ASCII string)" + , "--" , "-- @since 0.3.1" - , "blockDefinition :: Block -> BlockDefinition" - , "blockDefinition b = case b of" + , "blockDefinition :: Int# -> (# Int#, Int#, Addr# #)" + , "blockDefinition = \\case" , mconcat (reverse defs) - , "-- | Character block, if defined." + , "-- | Character block, if defined, else -1." , "--" , "-- @since 0.3.1" - , "block :: Char -> Maybe Int" - , "block (C# c#) = getBlock 0# " <> BB.intDec (length ranges - 1) <> BB.char7 '#' + , "block :: Char# -> Int#" + , "block c# = getBlock 0# " <> BB.intDec (length ranges - 1) <> BB.char7 '#' , " where" , " -- [NOTE] Encoding" , " -- A range is encoded as two LE Word32:" @@ -83,7 +85,7 @@ genBlocksModule moduleName = Fold step initial done , "" , " -- Binary search" , " getBlock l# u# = if isTrue# (l# ># u#)" - , " then Nothing" + , " then -1#" , " else" , " let k# = l# +# uncheckedIShiftRL# (u# -# l#) 1#" , " j# = k# `uncheckedIShiftL#` 1#" @@ -99,7 +101,7 @@ genBlocksModule moduleName = Fold step initial done , " then getBlock l# (k# -# 1#)" , " -- cp in block: get block index" , " else let block# = cpL0# `uncheckedShiftRL#` 21#" - , " in Just (I# (word2Int# block#))" + , " in word2Int# block#" , "" , " getRawCodePoint# = lookupWord32# ranges#" , "" @@ -111,7 +113,7 @@ genBlocksModule moduleName = Fold step initial done , " \"" <> enumMapToAddrLiteral 4 0xff (mkRanges ranges') "\"#" ] - initial = Acc mempty mempty mempty + initial = Acc 0 mempty mempty mempty step Acc{..} (Prop.Entry range blockName) = case range of U.SingleChar c -> error ("genBlocksModule: expected range, got: " <> show c) @@ -120,8 +122,9 @@ genBlocksModule moduleName = Fold step initial done blockRange = (ord start, ord end) blockName' = BB.shortByteString blockName in Acc - { blocks = mkBlockConstructor blockID blockName' blockRange : blocks - , defs = mkBlockDef blockID blockName' blockRange : defs + { count = succ count + , blocks = mkBlockConstructor blockID blockName' blockRange : blocks + , defs = mkBlockDef count blockName' blockRange : defs , ranges = blockRange : ranges } mkBlockConstructor blockID blockName (l, u) = mconcat @@ -135,16 +138,12 @@ genBlocksModule moduleName = Fold step initial done , "." ] - mkBlockDef blockID blockName (l, u) = mconcat + mkBlockDef blockIndex blockName (l, u) = mconcat [ " " - , blockID - , " -> BlockDefinition (0x" - , showPaddedHexB l - , ", 0x" - , showPaddedHexB u - , ") \"" - , blockName - , "\"\n" + , if u == ord maxBound then "_ " else BB.wordDec blockIndex <> "#" + , " -> (# 0x", showPaddedHexB l, "#, 0x", showPaddedHexB u, "#, \"" + , blockName -- NOTE: name is ASCII + , "\\0\"# #)\n" ] -- [NOTE] Encoding: a range is encoded as two LE Word32: diff --git a/unicode-data/lib/Unicode/Char/General/Blocks.hs b/unicode-data/lib/Unicode/Char/General/Blocks.hs index 51ec7855..437e258e 100644 --- a/unicode-data/lib/Unicode/Char/General/Blocks.hs +++ b/unicode-data/lib/Unicode/Char/General/Blocks.hs @@ -1,3 +1,7 @@ +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE DeriveGeneric #-} + -- | -- Module : Unicode.Char.General.Blocks -- Copyright : (c) 2020 Composewell Technologies and Contributors @@ -10,14 +14,19 @@ -- @since 0.3.1 module Unicode.Char.General.Blocks - ( B.Block(..) - , B.BlockDefinition(..) + ( -- * Blocks + B.Block(..) , block - , B.blockDefinition + -- * Blocks definitions + , BlockDefinition(..) + , blockDefinition ) where +import GHC.Exts (Char (..), Int (..), dataToTag#, tagToEnum#) + +import Unicode.Internal.Bits (unpackCString#) import qualified Unicode.Internal.Char.Blocks as B -- | Character [block](https://www.unicode.org/glossary/#block), if defined. @@ -25,4 +34,25 @@ import qualified Unicode.Internal.Char.Blocks as B -- @since 0.3.1 {-# INLINE block #-} block :: Char -> Maybe B.Block -block = fmap toEnum . B.block +block (C# c#) = case B.block c# of + -1# -> Nothing + b# -> Just (tagToEnum# b# :: B.Block) + +-- | Block definition: range and name. +-- +-- @since 0.3.1 +data BlockDefinition = BlockDefinition + { blockRange :: !(Int, Int) -- ^ Range + , blockName :: !String -- ^ Name + } deriving (Eq, Ord, Show) + +-- | Block definition +-- +-- @since 0.3.1 +blockDefinition :: B.Block -> BlockDefinition +blockDefinition b = case B.blockDefinition (dataToTag# b) of + (# lower#, upper#, name# #) -> BlockDefinition range name + where + !range = (I# lower#, I# upper#) + -- Note: names are ASCII. See Unicode Standard 15.0.0, section 3.4. + !name = unpackCString# name# diff --git a/unicode-data/lib/Unicode/Internal/Bits.hs b/unicode-data/lib/Unicode/Internal/Bits.hs index 5bc8ac10..97efa4a0 100644 --- a/unicode-data/lib/Unicode/Internal/Bits.hs +++ b/unicode-data/lib/Unicode/Internal/Bits.hs @@ -12,10 +12,13 @@ -- Fast, static bitmap lookup utilities module Unicode.Internal.Bits - ( lookupBit64, + ( -- * Bitmap lookup + lookupBit64, lookupWord8AsInt, lookupWord16AsInt, - lookupWord32# + lookupWord32#, + -- * CString + unpackCString# ) where #include "MachDeps.h" @@ -36,6 +39,12 @@ import GHC.Exts byteSwap16#, byteSwap32#) #endif +#if MIN_VERSION_base(4,15,0) +import GHC.Exts (unpackCString#) +#else +import GHC.CString (unpackCString#) +#endif + -- | @lookup64 addr index@ looks up the bit stored at bit index @index@ using a -- bitmap starting at the address @addr@. Looks up the 64-bit word containing -- the bit and then the bit in that word. The caller must make sure that the diff --git a/unicode-data/lib/Unicode/Internal/Char/Blocks.hs b/unicode-data/lib/Unicode/Internal/Char/Blocks.hs index 2936e82c..c332bbc9 100644 --- a/unicode-data/lib/Unicode/Internal/Char/Blocks.hs +++ b/unicode-data/lib/Unicode/Internal/Char/Blocks.hs @@ -6,10 +6,11 @@ -- Maintainer : streamly@composewell.com -- Stability : experimental +{-# LANGUAGE LambdaCase #-} {-# OPTIONS_HADDOCK hide #-} module Unicode.Internal.Char.Blocks -(Block(..), BlockDefinition(..), block, blockDefinition) +(Block(..), block, blockDefinition) where import Data.Ix (Ix) @@ -352,352 +353,352 @@ data Block | SupplementaryPrivateUseAreaB -- ^ @U+100000..U+10FFFF@: Supplementary Private Use Area-B. deriving (Enum, Bounded, Eq, Ord, Ix, Show) --- | Block definition: range and name. --- --- @since 0.3.1 -data BlockDefinition = BlockDefinition - { blockRange :: !(Int, Int) -- ^ Range - , blockName :: !String -- ^ Name - } deriving (Eq, Ord, Show) - -- | Block definition -- +-- Undefined for values greater than 326. +-- +-- Returned value: +-- +-- * Lower bound +-- * Upper bound +-- * Name (null terminated ASCII string) +-- -- @since 0.3.1 -blockDefinition :: Block -> BlockDefinition -blockDefinition b = case b of - BasicLatin -> BlockDefinition (0x0000, 0x007f) "Basic Latin" - Latin1Supplement -> BlockDefinition (0x0080, 0x00ff) "Latin-1 Supplement" - LatinExtendedA -> BlockDefinition (0x0100, 0x017f) "Latin Extended-A" - LatinExtendedB -> BlockDefinition (0x0180, 0x024f) "Latin Extended-B" - IPAExtensions -> BlockDefinition (0x0250, 0x02af) "IPA Extensions" - SpacingModifierLetters -> BlockDefinition (0x02b0, 0x02ff) "Spacing Modifier Letters" - CombiningDiacriticalMarks -> BlockDefinition (0x0300, 0x036f) "Combining Diacritical Marks" - GreekAndCoptic -> BlockDefinition (0x0370, 0x03ff) "Greek and Coptic" - Cyrillic -> BlockDefinition (0x0400, 0x04ff) "Cyrillic" - CyrillicSupplement -> BlockDefinition (0x0500, 0x052f) "Cyrillic Supplement" - Armenian -> BlockDefinition (0x0530, 0x058f) "Armenian" - Hebrew -> BlockDefinition (0x0590, 0x05ff) "Hebrew" - Arabic -> BlockDefinition (0x0600, 0x06ff) "Arabic" - Syriac -> BlockDefinition (0x0700, 0x074f) "Syriac" - ArabicSupplement -> BlockDefinition (0x0750, 0x077f) "Arabic Supplement" - Thaana -> BlockDefinition (0x0780, 0x07bf) "Thaana" - NKo -> BlockDefinition (0x07c0, 0x07ff) "NKo" - Samaritan -> BlockDefinition (0x0800, 0x083f) "Samaritan" - Mandaic -> BlockDefinition (0x0840, 0x085f) "Mandaic" - SyriacSupplement -> BlockDefinition (0x0860, 0x086f) "Syriac Supplement" - ArabicExtendedB -> BlockDefinition (0x0870, 0x089f) "Arabic Extended-B" - ArabicExtendedA -> BlockDefinition (0x08a0, 0x08ff) "Arabic Extended-A" - Devanagari -> BlockDefinition (0x0900, 0x097f) "Devanagari" - Bengali -> BlockDefinition (0x0980, 0x09ff) "Bengali" - Gurmukhi -> BlockDefinition (0x0a00, 0x0a7f) "Gurmukhi" - Gujarati -> BlockDefinition (0x0a80, 0x0aff) "Gujarati" - Oriya -> BlockDefinition (0x0b00, 0x0b7f) "Oriya" - Tamil -> BlockDefinition (0x0b80, 0x0bff) "Tamil" - Telugu -> BlockDefinition (0x0c00, 0x0c7f) "Telugu" - Kannada -> BlockDefinition (0x0c80, 0x0cff) "Kannada" - Malayalam -> BlockDefinition (0x0d00, 0x0d7f) "Malayalam" - Sinhala -> BlockDefinition (0x0d80, 0x0dff) "Sinhala" - Thai -> BlockDefinition (0x0e00, 0x0e7f) "Thai" - Lao -> BlockDefinition (0x0e80, 0x0eff) "Lao" - Tibetan -> BlockDefinition (0x0f00, 0x0fff) "Tibetan" - Myanmar -> BlockDefinition (0x1000, 0x109f) "Myanmar" - Georgian -> BlockDefinition (0x10a0, 0x10ff) "Georgian" - HangulJamo -> BlockDefinition (0x1100, 0x11ff) "Hangul Jamo" - Ethiopic -> BlockDefinition (0x1200, 0x137f) "Ethiopic" - EthiopicSupplement -> BlockDefinition (0x1380, 0x139f) "Ethiopic Supplement" - Cherokee -> BlockDefinition (0x13a0, 0x13ff) "Cherokee" - UnifiedCanadianAboriginalSyllabics -> BlockDefinition (0x1400, 0x167f) "Unified Canadian Aboriginal Syllabics" - Ogham -> BlockDefinition (0x1680, 0x169f) "Ogham" - Runic -> BlockDefinition (0x16a0, 0x16ff) "Runic" - Tagalog -> BlockDefinition (0x1700, 0x171f) "Tagalog" - Hanunoo -> BlockDefinition (0x1720, 0x173f) "Hanunoo" - Buhid -> BlockDefinition (0x1740, 0x175f) "Buhid" - Tagbanwa -> BlockDefinition (0x1760, 0x177f) "Tagbanwa" - Khmer -> BlockDefinition (0x1780, 0x17ff) "Khmer" - Mongolian -> BlockDefinition (0x1800, 0x18af) "Mongolian" - UnifiedCanadianAboriginalSyllabicsExtended -> BlockDefinition (0x18b0, 0x18ff) "Unified Canadian Aboriginal Syllabics Extended" - Limbu -> BlockDefinition (0x1900, 0x194f) "Limbu" - TaiLe -> BlockDefinition (0x1950, 0x197f) "Tai Le" - NewTaiLue -> BlockDefinition (0x1980, 0x19df) "New Tai Lue" - KhmerSymbols -> BlockDefinition (0x19e0, 0x19ff) "Khmer Symbols" - Buginese -> BlockDefinition (0x1a00, 0x1a1f) "Buginese" - TaiTham -> BlockDefinition (0x1a20, 0x1aaf) "Tai Tham" - CombiningDiacriticalMarksExtended -> BlockDefinition (0x1ab0, 0x1aff) "Combining Diacritical Marks Extended" - Balinese -> BlockDefinition (0x1b00, 0x1b7f) "Balinese" - Sundanese -> BlockDefinition (0x1b80, 0x1bbf) "Sundanese" - Batak -> BlockDefinition (0x1bc0, 0x1bff) "Batak" - Lepcha -> BlockDefinition (0x1c00, 0x1c4f) "Lepcha" - OlChiki -> BlockDefinition (0x1c50, 0x1c7f) "Ol Chiki" - CyrillicExtendedC -> BlockDefinition (0x1c80, 0x1c8f) "Cyrillic Extended-C" - GeorgianExtended -> BlockDefinition (0x1c90, 0x1cbf) "Georgian Extended" - SundaneseSupplement -> BlockDefinition (0x1cc0, 0x1ccf) "Sundanese Supplement" - VedicExtensions -> BlockDefinition (0x1cd0, 0x1cff) "Vedic Extensions" - PhoneticExtensions -> BlockDefinition (0x1d00, 0x1d7f) "Phonetic Extensions" - PhoneticExtensionsSupplement -> BlockDefinition (0x1d80, 0x1dbf) "Phonetic Extensions Supplement" - CombiningDiacriticalMarksSupplement -> BlockDefinition (0x1dc0, 0x1dff) "Combining Diacritical Marks Supplement" - LatinExtendedAdditional -> BlockDefinition (0x1e00, 0x1eff) "Latin Extended Additional" - GreekExtended -> BlockDefinition (0x1f00, 0x1fff) "Greek Extended" - GeneralPunctuation -> BlockDefinition (0x2000, 0x206f) "General Punctuation" - SuperscriptsAndSubscripts -> BlockDefinition (0x2070, 0x209f) "Superscripts and Subscripts" - CurrencySymbols -> BlockDefinition (0x20a0, 0x20cf) "Currency Symbols" - CombiningDiacriticalMarksForSymbols -> BlockDefinition (0x20d0, 0x20ff) "Combining Diacritical Marks for Symbols" - LetterlikeSymbols -> BlockDefinition (0x2100, 0x214f) "Letterlike Symbols" - NumberForms -> BlockDefinition (0x2150, 0x218f) "Number Forms" - Arrows -> BlockDefinition (0x2190, 0x21ff) "Arrows" - MathematicalOperators -> BlockDefinition (0x2200, 0x22ff) "Mathematical Operators" - MiscellaneousTechnical -> BlockDefinition (0x2300, 0x23ff) "Miscellaneous Technical" - ControlPictures -> BlockDefinition (0x2400, 0x243f) "Control Pictures" - OpticalCharacterRecognition -> BlockDefinition (0x2440, 0x245f) "Optical Character Recognition" - EnclosedAlphanumerics -> BlockDefinition (0x2460, 0x24ff) "Enclosed Alphanumerics" - BoxDrawing -> BlockDefinition (0x2500, 0x257f) "Box Drawing" - BlockElements -> BlockDefinition (0x2580, 0x259f) "Block Elements" - GeometricShapes -> BlockDefinition (0x25a0, 0x25ff) "Geometric Shapes" - MiscellaneousSymbols -> BlockDefinition (0x2600, 0x26ff) "Miscellaneous Symbols" - Dingbats -> BlockDefinition (0x2700, 0x27bf) "Dingbats" - MiscellaneousMathematicalSymbolsA -> BlockDefinition (0x27c0, 0x27ef) "Miscellaneous Mathematical Symbols-A" - SupplementalArrowsA -> BlockDefinition (0x27f0, 0x27ff) "Supplemental Arrows-A" - BraillePatterns -> BlockDefinition (0x2800, 0x28ff) "Braille Patterns" - SupplementalArrowsB -> BlockDefinition (0x2900, 0x297f) "Supplemental Arrows-B" - MiscellaneousMathematicalSymbolsB -> BlockDefinition (0x2980, 0x29ff) "Miscellaneous Mathematical Symbols-B" - SupplementalMathematicalOperators -> BlockDefinition (0x2a00, 0x2aff) "Supplemental Mathematical Operators" - MiscellaneousSymbolsAndArrows -> BlockDefinition (0x2b00, 0x2bff) "Miscellaneous Symbols and Arrows" - Glagolitic -> BlockDefinition (0x2c00, 0x2c5f) "Glagolitic" - LatinExtendedC -> BlockDefinition (0x2c60, 0x2c7f) "Latin Extended-C" - Coptic -> BlockDefinition (0x2c80, 0x2cff) "Coptic" - GeorgianSupplement -> BlockDefinition (0x2d00, 0x2d2f) "Georgian Supplement" - Tifinagh -> BlockDefinition (0x2d30, 0x2d7f) "Tifinagh" - EthiopicExtended -> BlockDefinition (0x2d80, 0x2ddf) "Ethiopic Extended" - CyrillicExtendedA -> BlockDefinition (0x2de0, 0x2dff) "Cyrillic Extended-A" - SupplementalPunctuation -> BlockDefinition (0x2e00, 0x2e7f) "Supplemental Punctuation" - CJKRadicalsSupplement -> BlockDefinition (0x2e80, 0x2eff) "CJK Radicals Supplement" - KangxiRadicals -> BlockDefinition (0x2f00, 0x2fdf) "Kangxi Radicals" - IdeographicDescriptionCharacters -> BlockDefinition (0x2ff0, 0x2fff) "Ideographic Description Characters" - CJKSymbolsAndPunctuation -> BlockDefinition (0x3000, 0x303f) "CJK Symbols and Punctuation" - Hiragana -> BlockDefinition (0x3040, 0x309f) "Hiragana" - Katakana -> BlockDefinition (0x30a0, 0x30ff) "Katakana" - Bopomofo -> BlockDefinition (0x3100, 0x312f) "Bopomofo" - HangulCompatibilityJamo -> BlockDefinition (0x3130, 0x318f) "Hangul Compatibility Jamo" - Kanbun -> BlockDefinition (0x3190, 0x319f) "Kanbun" - BopomofoExtended -> BlockDefinition (0x31a0, 0x31bf) "Bopomofo Extended" - CJKStrokes -> BlockDefinition (0x31c0, 0x31ef) "CJK Strokes" - KatakanaPhoneticExtensions -> BlockDefinition (0x31f0, 0x31ff) "Katakana Phonetic Extensions" - EnclosedCJKLettersAndMonths -> BlockDefinition (0x3200, 0x32ff) "Enclosed CJK Letters and Months" - CJKCompatibility -> BlockDefinition (0x3300, 0x33ff) "CJK Compatibility" - CJKUnifiedIdeographsExtensionA -> BlockDefinition (0x3400, 0x4dbf) "CJK Unified Ideographs Extension A" - YijingHexagramSymbols -> BlockDefinition (0x4dc0, 0x4dff) "Yijing Hexagram Symbols" - CJKUnifiedIdeographs -> BlockDefinition (0x4e00, 0x9fff) "CJK Unified Ideographs" - YiSyllables -> BlockDefinition (0xa000, 0xa48f) "Yi Syllables" - YiRadicals -> BlockDefinition (0xa490, 0xa4cf) "Yi Radicals" - Lisu -> BlockDefinition (0xa4d0, 0xa4ff) "Lisu" - Vai -> BlockDefinition (0xa500, 0xa63f) "Vai" - CyrillicExtendedB -> BlockDefinition (0xa640, 0xa69f) "Cyrillic Extended-B" - Bamum -> BlockDefinition (0xa6a0, 0xa6ff) "Bamum" - ModifierToneLetters -> BlockDefinition (0xa700, 0xa71f) "Modifier Tone Letters" - LatinExtendedD -> BlockDefinition (0xa720, 0xa7ff) "Latin Extended-D" - SylotiNagri -> BlockDefinition (0xa800, 0xa82f) "Syloti Nagri" - CommonIndicNumberForms -> BlockDefinition (0xa830, 0xa83f) "Common Indic Number Forms" - PhagsPa -> BlockDefinition (0xa840, 0xa87f) "Phags-pa" - Saurashtra -> BlockDefinition (0xa880, 0xa8df) "Saurashtra" - DevanagariExtended -> BlockDefinition (0xa8e0, 0xa8ff) "Devanagari Extended" - KayahLi -> BlockDefinition (0xa900, 0xa92f) "Kayah Li" - Rejang -> BlockDefinition (0xa930, 0xa95f) "Rejang" - HangulJamoExtendedA -> BlockDefinition (0xa960, 0xa97f) "Hangul Jamo Extended-A" - Javanese -> BlockDefinition (0xa980, 0xa9df) "Javanese" - MyanmarExtendedB -> BlockDefinition (0xa9e0, 0xa9ff) "Myanmar Extended-B" - Cham -> BlockDefinition (0xaa00, 0xaa5f) "Cham" - MyanmarExtendedA -> BlockDefinition (0xaa60, 0xaa7f) "Myanmar Extended-A" - TaiViet -> BlockDefinition (0xaa80, 0xaadf) "Tai Viet" - MeeteiMayekExtensions -> BlockDefinition (0xaae0, 0xaaff) "Meetei Mayek Extensions" - EthiopicExtendedA -> BlockDefinition (0xab00, 0xab2f) "Ethiopic Extended-A" - LatinExtendedE -> BlockDefinition (0xab30, 0xab6f) "Latin Extended-E" - CherokeeSupplement -> BlockDefinition (0xab70, 0xabbf) "Cherokee Supplement" - MeeteiMayek -> BlockDefinition (0xabc0, 0xabff) "Meetei Mayek" - HangulSyllables -> BlockDefinition (0xac00, 0xd7af) "Hangul Syllables" - HangulJamoExtendedB -> BlockDefinition (0xd7b0, 0xd7ff) "Hangul Jamo Extended-B" - HighSurrogates -> BlockDefinition (0xd800, 0xdb7f) "High Surrogates" - HighPrivateUseSurrogates -> BlockDefinition (0xdb80, 0xdbff) "High Private Use Surrogates" - LowSurrogates -> BlockDefinition (0xdc00, 0xdfff) "Low Surrogates" - PrivateUseArea -> BlockDefinition (0xe000, 0xf8ff) "Private Use Area" - CJKCompatibilityIdeographs -> BlockDefinition (0xf900, 0xfaff) "CJK Compatibility Ideographs" - AlphabeticPresentationForms -> BlockDefinition (0xfb00, 0xfb4f) "Alphabetic Presentation Forms" - ArabicPresentationFormsA -> BlockDefinition (0xfb50, 0xfdff) "Arabic Presentation Forms-A" - VariationSelectors -> BlockDefinition (0xfe00, 0xfe0f) "Variation Selectors" - VerticalForms -> BlockDefinition (0xfe10, 0xfe1f) "Vertical Forms" - CombiningHalfMarks -> BlockDefinition (0xfe20, 0xfe2f) "Combining Half Marks" - CJKCompatibilityForms -> BlockDefinition (0xfe30, 0xfe4f) "CJK Compatibility Forms" - SmallFormVariants -> BlockDefinition (0xfe50, 0xfe6f) "Small Form Variants" - ArabicPresentationFormsB -> BlockDefinition (0xfe70, 0xfeff) "Arabic Presentation Forms-B" - HalfwidthAndFullwidthForms -> BlockDefinition (0xff00, 0xffef) "Halfwidth and Fullwidth Forms" - Specials -> BlockDefinition (0xfff0, 0xffff) "Specials" - LinearBSyllabary -> BlockDefinition (0x10000, 0x1007f) "Linear B Syllabary" - LinearBIdeograms -> BlockDefinition (0x10080, 0x100ff) "Linear B Ideograms" - AegeanNumbers -> BlockDefinition (0x10100, 0x1013f) "Aegean Numbers" - AncientGreekNumbers -> BlockDefinition (0x10140, 0x1018f) "Ancient Greek Numbers" - AncientSymbols -> BlockDefinition (0x10190, 0x101cf) "Ancient Symbols" - PhaistosDisc -> BlockDefinition (0x101d0, 0x101ff) "Phaistos Disc" - Lycian -> BlockDefinition (0x10280, 0x1029f) "Lycian" - Carian -> BlockDefinition (0x102a0, 0x102df) "Carian" - CopticEpactNumbers -> BlockDefinition (0x102e0, 0x102ff) "Coptic Epact Numbers" - OldItalic -> BlockDefinition (0x10300, 0x1032f) "Old Italic" - Gothic -> BlockDefinition (0x10330, 0x1034f) "Gothic" - OldPermic -> BlockDefinition (0x10350, 0x1037f) "Old Permic" - Ugaritic -> BlockDefinition (0x10380, 0x1039f) "Ugaritic" - OldPersian -> BlockDefinition (0x103a0, 0x103df) "Old Persian" - Deseret -> BlockDefinition (0x10400, 0x1044f) "Deseret" - Shavian -> BlockDefinition (0x10450, 0x1047f) "Shavian" - Osmanya -> BlockDefinition (0x10480, 0x104af) "Osmanya" - Osage -> BlockDefinition (0x104b0, 0x104ff) "Osage" - Elbasan -> BlockDefinition (0x10500, 0x1052f) "Elbasan" - CaucasianAlbanian -> BlockDefinition (0x10530, 0x1056f) "Caucasian Albanian" - Vithkuqi -> BlockDefinition (0x10570, 0x105bf) "Vithkuqi" - LinearA -> BlockDefinition (0x10600, 0x1077f) "Linear A" - LatinExtendedF -> BlockDefinition (0x10780, 0x107bf) "Latin Extended-F" - CypriotSyllabary -> BlockDefinition (0x10800, 0x1083f) "Cypriot Syllabary" - ImperialAramaic -> BlockDefinition (0x10840, 0x1085f) "Imperial Aramaic" - Palmyrene -> BlockDefinition (0x10860, 0x1087f) "Palmyrene" - Nabataean -> BlockDefinition (0x10880, 0x108af) "Nabataean" - Hatran -> BlockDefinition (0x108e0, 0x108ff) "Hatran" - Phoenician -> BlockDefinition (0x10900, 0x1091f) "Phoenician" - Lydian -> BlockDefinition (0x10920, 0x1093f) "Lydian" - MeroiticHieroglyphs -> BlockDefinition (0x10980, 0x1099f) "Meroitic Hieroglyphs" - MeroiticCursive -> BlockDefinition (0x109a0, 0x109ff) "Meroitic Cursive" - Kharoshthi -> BlockDefinition (0x10a00, 0x10a5f) "Kharoshthi" - OldSouthArabian -> BlockDefinition (0x10a60, 0x10a7f) "Old South Arabian" - OldNorthArabian -> BlockDefinition (0x10a80, 0x10a9f) "Old North Arabian" - Manichaean -> BlockDefinition (0x10ac0, 0x10aff) "Manichaean" - Avestan -> BlockDefinition (0x10b00, 0x10b3f) "Avestan" - InscriptionalParthian -> BlockDefinition (0x10b40, 0x10b5f) "Inscriptional Parthian" - InscriptionalPahlavi -> BlockDefinition (0x10b60, 0x10b7f) "Inscriptional Pahlavi" - PsalterPahlavi -> BlockDefinition (0x10b80, 0x10baf) "Psalter Pahlavi" - OldTurkic -> BlockDefinition (0x10c00, 0x10c4f) "Old Turkic" - OldHungarian -> BlockDefinition (0x10c80, 0x10cff) "Old Hungarian" - HanifiRohingya -> BlockDefinition (0x10d00, 0x10d3f) "Hanifi Rohingya" - RumiNumeralSymbols -> BlockDefinition (0x10e60, 0x10e7f) "Rumi Numeral Symbols" - Yezidi -> BlockDefinition (0x10e80, 0x10ebf) "Yezidi" - ArabicExtendedC -> BlockDefinition (0x10ec0, 0x10eff) "Arabic Extended-C" - OldSogdian -> BlockDefinition (0x10f00, 0x10f2f) "Old Sogdian" - Sogdian -> BlockDefinition (0x10f30, 0x10f6f) "Sogdian" - OldUyghur -> BlockDefinition (0x10f70, 0x10faf) "Old Uyghur" - Chorasmian -> BlockDefinition (0x10fb0, 0x10fdf) "Chorasmian" - Elymaic -> BlockDefinition (0x10fe0, 0x10fff) "Elymaic" - Brahmi -> BlockDefinition (0x11000, 0x1107f) "Brahmi" - Kaithi -> BlockDefinition (0x11080, 0x110cf) "Kaithi" - SoraSompeng -> BlockDefinition (0x110d0, 0x110ff) "Sora Sompeng" - Chakma -> BlockDefinition (0x11100, 0x1114f) "Chakma" - Mahajani -> BlockDefinition (0x11150, 0x1117f) "Mahajani" - Sharada -> BlockDefinition (0x11180, 0x111df) "Sharada" - SinhalaArchaicNumbers -> BlockDefinition (0x111e0, 0x111ff) "Sinhala Archaic Numbers" - Khojki -> BlockDefinition (0x11200, 0x1124f) "Khojki" - Multani -> BlockDefinition (0x11280, 0x112af) "Multani" - Khudawadi -> BlockDefinition (0x112b0, 0x112ff) "Khudawadi" - Grantha -> BlockDefinition (0x11300, 0x1137f) "Grantha" - Newa -> BlockDefinition (0x11400, 0x1147f) "Newa" - Tirhuta -> BlockDefinition (0x11480, 0x114df) "Tirhuta" - Siddham -> BlockDefinition (0x11580, 0x115ff) "Siddham" - Modi -> BlockDefinition (0x11600, 0x1165f) "Modi" - MongolianSupplement -> BlockDefinition (0x11660, 0x1167f) "Mongolian Supplement" - Takri -> BlockDefinition (0x11680, 0x116cf) "Takri" - Ahom -> BlockDefinition (0x11700, 0x1174f) "Ahom" - Dogra -> BlockDefinition (0x11800, 0x1184f) "Dogra" - WarangCiti -> BlockDefinition (0x118a0, 0x118ff) "Warang Citi" - DivesAkuru -> BlockDefinition (0x11900, 0x1195f) "Dives Akuru" - Nandinagari -> BlockDefinition (0x119a0, 0x119ff) "Nandinagari" - ZanabazarSquare -> BlockDefinition (0x11a00, 0x11a4f) "Zanabazar Square" - Soyombo -> BlockDefinition (0x11a50, 0x11aaf) "Soyombo" - UnifiedCanadianAboriginalSyllabicsExtendedA -> BlockDefinition (0x11ab0, 0x11abf) "Unified Canadian Aboriginal Syllabics Extended-A" - PauCinHau -> BlockDefinition (0x11ac0, 0x11aff) "Pau Cin Hau" - DevanagariExtendedA -> BlockDefinition (0x11b00, 0x11b5f) "Devanagari Extended-A" - Bhaiksuki -> BlockDefinition (0x11c00, 0x11c6f) "Bhaiksuki" - Marchen -> BlockDefinition (0x11c70, 0x11cbf) "Marchen" - MasaramGondi -> BlockDefinition (0x11d00, 0x11d5f) "Masaram Gondi" - GunjalaGondi -> BlockDefinition (0x11d60, 0x11daf) "Gunjala Gondi" - Makasar -> BlockDefinition (0x11ee0, 0x11eff) "Makasar" - Kawi -> BlockDefinition (0x11f00, 0x11f5f) "Kawi" - LisuSupplement -> BlockDefinition (0x11fb0, 0x11fbf) "Lisu Supplement" - TamilSupplement -> BlockDefinition (0x11fc0, 0x11fff) "Tamil Supplement" - Cuneiform -> BlockDefinition (0x12000, 0x123ff) "Cuneiform" - CuneiformNumbersAndPunctuation -> BlockDefinition (0x12400, 0x1247f) "Cuneiform Numbers and Punctuation" - EarlyDynasticCuneiform -> BlockDefinition (0x12480, 0x1254f) "Early Dynastic Cuneiform" - CyproMinoan -> BlockDefinition (0x12f90, 0x12fff) "Cypro-Minoan" - EgyptianHieroglyphs -> BlockDefinition (0x13000, 0x1342f) "Egyptian Hieroglyphs" - EgyptianHieroglyphFormatControls -> BlockDefinition (0x13430, 0x1345f) "Egyptian Hieroglyph Format Controls" - AnatolianHieroglyphs -> BlockDefinition (0x14400, 0x1467f) "Anatolian Hieroglyphs" - BamumSupplement -> BlockDefinition (0x16800, 0x16a3f) "Bamum Supplement" - Mro -> BlockDefinition (0x16a40, 0x16a6f) "Mro" - Tangsa -> BlockDefinition (0x16a70, 0x16acf) "Tangsa" - BassaVah -> BlockDefinition (0x16ad0, 0x16aff) "Bassa Vah" - PahawhHmong -> BlockDefinition (0x16b00, 0x16b8f) "Pahawh Hmong" - Medefaidrin -> BlockDefinition (0x16e40, 0x16e9f) "Medefaidrin" - Miao -> BlockDefinition (0x16f00, 0x16f9f) "Miao" - IdeographicSymbolsAndPunctuation -> BlockDefinition (0x16fe0, 0x16fff) "Ideographic Symbols and Punctuation" - Tangut -> BlockDefinition (0x17000, 0x187ff) "Tangut" - TangutComponents -> BlockDefinition (0x18800, 0x18aff) "Tangut Components" - KhitanSmallScript -> BlockDefinition (0x18b00, 0x18cff) "Khitan Small Script" - TangutSupplement -> BlockDefinition (0x18d00, 0x18d7f) "Tangut Supplement" - KanaExtendedB -> BlockDefinition (0x1aff0, 0x1afff) "Kana Extended-B" - KanaSupplement -> BlockDefinition (0x1b000, 0x1b0ff) "Kana Supplement" - KanaExtendedA -> BlockDefinition (0x1b100, 0x1b12f) "Kana Extended-A" - SmallKanaExtension -> BlockDefinition (0x1b130, 0x1b16f) "Small Kana Extension" - Nushu -> BlockDefinition (0x1b170, 0x1b2ff) "Nushu" - Duployan -> BlockDefinition (0x1bc00, 0x1bc9f) "Duployan" - ShorthandFormatControls -> BlockDefinition (0x1bca0, 0x1bcaf) "Shorthand Format Controls" - ZnamennyMusicalNotation -> BlockDefinition (0x1cf00, 0x1cfcf) "Znamenny Musical Notation" - ByzantineMusicalSymbols -> BlockDefinition (0x1d000, 0x1d0ff) "Byzantine Musical Symbols" - MusicalSymbols -> BlockDefinition (0x1d100, 0x1d1ff) "Musical Symbols" - AncientGreekMusicalNotation -> BlockDefinition (0x1d200, 0x1d24f) "Ancient Greek Musical Notation" - KaktovikNumerals -> BlockDefinition (0x1d2c0, 0x1d2df) "Kaktovik Numerals" - MayanNumerals -> BlockDefinition (0x1d2e0, 0x1d2ff) "Mayan Numerals" - TaiXuanJingSymbols -> BlockDefinition (0x1d300, 0x1d35f) "Tai Xuan Jing Symbols" - CountingRodNumerals -> BlockDefinition (0x1d360, 0x1d37f) "Counting Rod Numerals" - MathematicalAlphanumericSymbols -> BlockDefinition (0x1d400, 0x1d7ff) "Mathematical Alphanumeric Symbols" - SuttonSignWriting -> BlockDefinition (0x1d800, 0x1daaf) "Sutton SignWriting" - LatinExtendedG -> BlockDefinition (0x1df00, 0x1dfff) "Latin Extended-G" - GlagoliticSupplement -> BlockDefinition (0x1e000, 0x1e02f) "Glagolitic Supplement" - CyrillicExtendedD -> BlockDefinition (0x1e030, 0x1e08f) "Cyrillic Extended-D" - NyiakengPuachueHmong -> BlockDefinition (0x1e100, 0x1e14f) "Nyiakeng Puachue Hmong" - Toto -> BlockDefinition (0x1e290, 0x1e2bf) "Toto" - Wancho -> BlockDefinition (0x1e2c0, 0x1e2ff) "Wancho" - NagMundari -> BlockDefinition (0x1e4d0, 0x1e4ff) "Nag Mundari" - EthiopicExtendedB -> BlockDefinition (0x1e7e0, 0x1e7ff) "Ethiopic Extended-B" - MendeKikakui -> BlockDefinition (0x1e800, 0x1e8df) "Mende Kikakui" - Adlam -> BlockDefinition (0x1e900, 0x1e95f) "Adlam" - IndicSiyaqNumbers -> BlockDefinition (0x1ec70, 0x1ecbf) "Indic Siyaq Numbers" - OttomanSiyaqNumbers -> BlockDefinition (0x1ed00, 0x1ed4f) "Ottoman Siyaq Numbers" - ArabicMathematicalAlphabeticSymbols -> BlockDefinition (0x1ee00, 0x1eeff) "Arabic Mathematical Alphabetic Symbols" - MahjongTiles -> BlockDefinition (0x1f000, 0x1f02f) "Mahjong Tiles" - DominoTiles -> BlockDefinition (0x1f030, 0x1f09f) "Domino Tiles" - PlayingCards -> BlockDefinition (0x1f0a0, 0x1f0ff) "Playing Cards" - EnclosedAlphanumericSupplement -> BlockDefinition (0x1f100, 0x1f1ff) "Enclosed Alphanumeric Supplement" - EnclosedIdeographicSupplement -> BlockDefinition (0x1f200, 0x1f2ff) "Enclosed Ideographic Supplement" - MiscellaneousSymbolsAndPictographs -> BlockDefinition (0x1f300, 0x1f5ff) "Miscellaneous Symbols and Pictographs" - Emoticons -> BlockDefinition (0x1f600, 0x1f64f) "Emoticons" - OrnamentalDingbats -> BlockDefinition (0x1f650, 0x1f67f) "Ornamental Dingbats" - TransportAndMapSymbols -> BlockDefinition (0x1f680, 0x1f6ff) "Transport and Map Symbols" - AlchemicalSymbols -> BlockDefinition (0x1f700, 0x1f77f) "Alchemical Symbols" - GeometricShapesExtended -> BlockDefinition (0x1f780, 0x1f7ff) "Geometric Shapes Extended" - SupplementalArrowsC -> BlockDefinition (0x1f800, 0x1f8ff) "Supplemental Arrows-C" - SupplementalSymbolsAndPictographs -> BlockDefinition (0x1f900, 0x1f9ff) "Supplemental Symbols and Pictographs" - ChessSymbols -> BlockDefinition (0x1fa00, 0x1fa6f) "Chess Symbols" - SymbolsAndPictographsExtendedA -> BlockDefinition (0x1fa70, 0x1faff) "Symbols and Pictographs Extended-A" - SymbolsForLegacyComputing -> BlockDefinition (0x1fb00, 0x1fbff) "Symbols for Legacy Computing" - CJKUnifiedIdeographsExtensionB -> BlockDefinition (0x20000, 0x2a6df) "CJK Unified Ideographs Extension B" - CJKUnifiedIdeographsExtensionC -> BlockDefinition (0x2a700, 0x2b73f) "CJK Unified Ideographs Extension C" - CJKUnifiedIdeographsExtensionD -> BlockDefinition (0x2b740, 0x2b81f) "CJK Unified Ideographs Extension D" - CJKUnifiedIdeographsExtensionE -> BlockDefinition (0x2b820, 0x2ceaf) "CJK Unified Ideographs Extension E" - CJKUnifiedIdeographsExtensionF -> BlockDefinition (0x2ceb0, 0x2ebef) "CJK Unified Ideographs Extension F" - CJKCompatibilityIdeographsSupplement -> BlockDefinition (0x2f800, 0x2fa1f) "CJK Compatibility Ideographs Supplement" - CJKUnifiedIdeographsExtensionG -> BlockDefinition (0x30000, 0x3134f) "CJK Unified Ideographs Extension G" - CJKUnifiedIdeographsExtensionH -> BlockDefinition (0x31350, 0x323af) "CJK Unified Ideographs Extension H" - Tags -> BlockDefinition (0xe0000, 0xe007f) "Tags" - VariationSelectorsSupplement -> BlockDefinition (0xe0100, 0xe01ef) "Variation Selectors Supplement" - SupplementaryPrivateUseAreaA -> BlockDefinition (0xf0000, 0xfffff) "Supplementary Private Use Area-A" - SupplementaryPrivateUseAreaB -> BlockDefinition (0x100000, 0x10ffff) "Supplementary Private Use Area-B" +blockDefinition :: Int# -> (# Int#, Int#, Addr# #) +blockDefinition = \case + 0# -> (# 0x0000#, 0x007f#, "Basic Latin\0"# #) + 1# -> (# 0x0080#, 0x00ff#, "Latin-1 Supplement\0"# #) + 2# -> (# 0x0100#, 0x017f#, "Latin Extended-A\0"# #) + 3# -> (# 0x0180#, 0x024f#, "Latin Extended-B\0"# #) + 4# -> (# 0x0250#, 0x02af#, "IPA Extensions\0"# #) + 5# -> (# 0x02b0#, 0x02ff#, "Spacing Modifier Letters\0"# #) + 6# -> (# 0x0300#, 0x036f#, "Combining Diacritical Marks\0"# #) + 7# -> (# 0x0370#, 0x03ff#, "Greek and Coptic\0"# #) + 8# -> (# 0x0400#, 0x04ff#, "Cyrillic\0"# #) + 9# -> (# 0x0500#, 0x052f#, "Cyrillic Supplement\0"# #) + 10# -> (# 0x0530#, 0x058f#, "Armenian\0"# #) + 11# -> (# 0x0590#, 0x05ff#, "Hebrew\0"# #) + 12# -> (# 0x0600#, 0x06ff#, "Arabic\0"# #) + 13# -> (# 0x0700#, 0x074f#, "Syriac\0"# #) + 14# -> (# 0x0750#, 0x077f#, "Arabic Supplement\0"# #) + 15# -> (# 0x0780#, 0x07bf#, "Thaana\0"# #) + 16# -> (# 0x07c0#, 0x07ff#, "NKo\0"# #) + 17# -> (# 0x0800#, 0x083f#, "Samaritan\0"# #) + 18# -> (# 0x0840#, 0x085f#, "Mandaic\0"# #) + 19# -> (# 0x0860#, 0x086f#, "Syriac Supplement\0"# #) + 20# -> (# 0x0870#, 0x089f#, "Arabic Extended-B\0"# #) + 21# -> (# 0x08a0#, 0x08ff#, "Arabic Extended-A\0"# #) + 22# -> (# 0x0900#, 0x097f#, "Devanagari\0"# #) + 23# -> (# 0x0980#, 0x09ff#, "Bengali\0"# #) + 24# -> (# 0x0a00#, 0x0a7f#, "Gurmukhi\0"# #) + 25# -> (# 0x0a80#, 0x0aff#, "Gujarati\0"# #) + 26# -> (# 0x0b00#, 0x0b7f#, "Oriya\0"# #) + 27# -> (# 0x0b80#, 0x0bff#, "Tamil\0"# #) + 28# -> (# 0x0c00#, 0x0c7f#, "Telugu\0"# #) + 29# -> (# 0x0c80#, 0x0cff#, "Kannada\0"# #) + 30# -> (# 0x0d00#, 0x0d7f#, "Malayalam\0"# #) + 31# -> (# 0x0d80#, 0x0dff#, "Sinhala\0"# #) + 32# -> (# 0x0e00#, 0x0e7f#, "Thai\0"# #) + 33# -> (# 0x0e80#, 0x0eff#, "Lao\0"# #) + 34# -> (# 0x0f00#, 0x0fff#, "Tibetan\0"# #) + 35# -> (# 0x1000#, 0x109f#, "Myanmar\0"# #) + 36# -> (# 0x10a0#, 0x10ff#, "Georgian\0"# #) + 37# -> (# 0x1100#, 0x11ff#, "Hangul Jamo\0"# #) + 38# -> (# 0x1200#, 0x137f#, "Ethiopic\0"# #) + 39# -> (# 0x1380#, 0x139f#, "Ethiopic Supplement\0"# #) + 40# -> (# 0x13a0#, 0x13ff#, "Cherokee\0"# #) + 41# -> (# 0x1400#, 0x167f#, "Unified Canadian Aboriginal Syllabics\0"# #) + 42# -> (# 0x1680#, 0x169f#, "Ogham\0"# #) + 43# -> (# 0x16a0#, 0x16ff#, "Runic\0"# #) + 44# -> (# 0x1700#, 0x171f#, "Tagalog\0"# #) + 45# -> (# 0x1720#, 0x173f#, "Hanunoo\0"# #) + 46# -> (# 0x1740#, 0x175f#, "Buhid\0"# #) + 47# -> (# 0x1760#, 0x177f#, "Tagbanwa\0"# #) + 48# -> (# 0x1780#, 0x17ff#, "Khmer\0"# #) + 49# -> (# 0x1800#, 0x18af#, "Mongolian\0"# #) + 50# -> (# 0x18b0#, 0x18ff#, "Unified Canadian Aboriginal Syllabics Extended\0"# #) + 51# -> (# 0x1900#, 0x194f#, "Limbu\0"# #) + 52# -> (# 0x1950#, 0x197f#, "Tai Le\0"# #) + 53# -> (# 0x1980#, 0x19df#, "New Tai Lue\0"# #) + 54# -> (# 0x19e0#, 0x19ff#, "Khmer Symbols\0"# #) + 55# -> (# 0x1a00#, 0x1a1f#, "Buginese\0"# #) + 56# -> (# 0x1a20#, 0x1aaf#, "Tai Tham\0"# #) + 57# -> (# 0x1ab0#, 0x1aff#, "Combining Diacritical Marks Extended\0"# #) + 58# -> (# 0x1b00#, 0x1b7f#, "Balinese\0"# #) + 59# -> (# 0x1b80#, 0x1bbf#, "Sundanese\0"# #) + 60# -> (# 0x1bc0#, 0x1bff#, "Batak\0"# #) + 61# -> (# 0x1c00#, 0x1c4f#, "Lepcha\0"# #) + 62# -> (# 0x1c50#, 0x1c7f#, "Ol Chiki\0"# #) + 63# -> (# 0x1c80#, 0x1c8f#, "Cyrillic Extended-C\0"# #) + 64# -> (# 0x1c90#, 0x1cbf#, "Georgian Extended\0"# #) + 65# -> (# 0x1cc0#, 0x1ccf#, "Sundanese Supplement\0"# #) + 66# -> (# 0x1cd0#, 0x1cff#, "Vedic Extensions\0"# #) + 67# -> (# 0x1d00#, 0x1d7f#, "Phonetic Extensions\0"# #) + 68# -> (# 0x1d80#, 0x1dbf#, "Phonetic Extensions Supplement\0"# #) + 69# -> (# 0x1dc0#, 0x1dff#, "Combining Diacritical Marks Supplement\0"# #) + 70# -> (# 0x1e00#, 0x1eff#, "Latin Extended Additional\0"# #) + 71# -> (# 0x1f00#, 0x1fff#, "Greek Extended\0"# #) + 72# -> (# 0x2000#, 0x206f#, "General Punctuation\0"# #) + 73# -> (# 0x2070#, 0x209f#, "Superscripts and Subscripts\0"# #) + 74# -> (# 0x20a0#, 0x20cf#, "Currency Symbols\0"# #) + 75# -> (# 0x20d0#, 0x20ff#, "Combining Diacritical Marks for Symbols\0"# #) + 76# -> (# 0x2100#, 0x214f#, "Letterlike Symbols\0"# #) + 77# -> (# 0x2150#, 0x218f#, "Number Forms\0"# #) + 78# -> (# 0x2190#, 0x21ff#, "Arrows\0"# #) + 79# -> (# 0x2200#, 0x22ff#, "Mathematical Operators\0"# #) + 80# -> (# 0x2300#, 0x23ff#, "Miscellaneous Technical\0"# #) + 81# -> (# 0x2400#, 0x243f#, "Control Pictures\0"# #) + 82# -> (# 0x2440#, 0x245f#, "Optical Character Recognition\0"# #) + 83# -> (# 0x2460#, 0x24ff#, "Enclosed Alphanumerics\0"# #) + 84# -> (# 0x2500#, 0x257f#, "Box Drawing\0"# #) + 85# -> (# 0x2580#, 0x259f#, "Block Elements\0"# #) + 86# -> (# 0x25a0#, 0x25ff#, "Geometric Shapes\0"# #) + 87# -> (# 0x2600#, 0x26ff#, "Miscellaneous Symbols\0"# #) + 88# -> (# 0x2700#, 0x27bf#, "Dingbats\0"# #) + 89# -> (# 0x27c0#, 0x27ef#, "Miscellaneous Mathematical Symbols-A\0"# #) + 90# -> (# 0x27f0#, 0x27ff#, "Supplemental Arrows-A\0"# #) + 91# -> (# 0x2800#, 0x28ff#, "Braille Patterns\0"# #) + 92# -> (# 0x2900#, 0x297f#, "Supplemental Arrows-B\0"# #) + 93# -> (# 0x2980#, 0x29ff#, "Miscellaneous Mathematical Symbols-B\0"# #) + 94# -> (# 0x2a00#, 0x2aff#, "Supplemental Mathematical Operators\0"# #) + 95# -> (# 0x2b00#, 0x2bff#, "Miscellaneous Symbols and Arrows\0"# #) + 96# -> (# 0x2c00#, 0x2c5f#, "Glagolitic\0"# #) + 97# -> (# 0x2c60#, 0x2c7f#, "Latin Extended-C\0"# #) + 98# -> (# 0x2c80#, 0x2cff#, "Coptic\0"# #) + 99# -> (# 0x2d00#, 0x2d2f#, "Georgian Supplement\0"# #) + 100# -> (# 0x2d30#, 0x2d7f#, "Tifinagh\0"# #) + 101# -> (# 0x2d80#, 0x2ddf#, "Ethiopic Extended\0"# #) + 102# -> (# 0x2de0#, 0x2dff#, "Cyrillic Extended-A\0"# #) + 103# -> (# 0x2e00#, 0x2e7f#, "Supplemental Punctuation\0"# #) + 104# -> (# 0x2e80#, 0x2eff#, "CJK Radicals Supplement\0"# #) + 105# -> (# 0x2f00#, 0x2fdf#, "Kangxi Radicals\0"# #) + 106# -> (# 0x2ff0#, 0x2fff#, "Ideographic Description Characters\0"# #) + 107# -> (# 0x3000#, 0x303f#, "CJK Symbols and Punctuation\0"# #) + 108# -> (# 0x3040#, 0x309f#, "Hiragana\0"# #) + 109# -> (# 0x30a0#, 0x30ff#, "Katakana\0"# #) + 110# -> (# 0x3100#, 0x312f#, "Bopomofo\0"# #) + 111# -> (# 0x3130#, 0x318f#, "Hangul Compatibility Jamo\0"# #) + 112# -> (# 0x3190#, 0x319f#, "Kanbun\0"# #) + 113# -> (# 0x31a0#, 0x31bf#, "Bopomofo Extended\0"# #) + 114# -> (# 0x31c0#, 0x31ef#, "CJK Strokes\0"# #) + 115# -> (# 0x31f0#, 0x31ff#, "Katakana Phonetic Extensions\0"# #) + 116# -> (# 0x3200#, 0x32ff#, "Enclosed CJK Letters and Months\0"# #) + 117# -> (# 0x3300#, 0x33ff#, "CJK Compatibility\0"# #) + 118# -> (# 0x3400#, 0x4dbf#, "CJK Unified Ideographs Extension A\0"# #) + 119# -> (# 0x4dc0#, 0x4dff#, "Yijing Hexagram Symbols\0"# #) + 120# -> (# 0x4e00#, 0x9fff#, "CJK Unified Ideographs\0"# #) + 121# -> (# 0xa000#, 0xa48f#, "Yi Syllables\0"# #) + 122# -> (# 0xa490#, 0xa4cf#, "Yi Radicals\0"# #) + 123# -> (# 0xa4d0#, 0xa4ff#, "Lisu\0"# #) + 124# -> (# 0xa500#, 0xa63f#, "Vai\0"# #) + 125# -> (# 0xa640#, 0xa69f#, "Cyrillic Extended-B\0"# #) + 126# -> (# 0xa6a0#, 0xa6ff#, "Bamum\0"# #) + 127# -> (# 0xa700#, 0xa71f#, "Modifier Tone Letters\0"# #) + 128# -> (# 0xa720#, 0xa7ff#, "Latin Extended-D\0"# #) + 129# -> (# 0xa800#, 0xa82f#, "Syloti Nagri\0"# #) + 130# -> (# 0xa830#, 0xa83f#, "Common Indic Number Forms\0"# #) + 131# -> (# 0xa840#, 0xa87f#, "Phags-pa\0"# #) + 132# -> (# 0xa880#, 0xa8df#, "Saurashtra\0"# #) + 133# -> (# 0xa8e0#, 0xa8ff#, "Devanagari Extended\0"# #) + 134# -> (# 0xa900#, 0xa92f#, "Kayah Li\0"# #) + 135# -> (# 0xa930#, 0xa95f#, "Rejang\0"# #) + 136# -> (# 0xa960#, 0xa97f#, "Hangul Jamo Extended-A\0"# #) + 137# -> (# 0xa980#, 0xa9df#, "Javanese\0"# #) + 138# -> (# 0xa9e0#, 0xa9ff#, "Myanmar Extended-B\0"# #) + 139# -> (# 0xaa00#, 0xaa5f#, "Cham\0"# #) + 140# -> (# 0xaa60#, 0xaa7f#, "Myanmar Extended-A\0"# #) + 141# -> (# 0xaa80#, 0xaadf#, "Tai Viet\0"# #) + 142# -> (# 0xaae0#, 0xaaff#, "Meetei Mayek Extensions\0"# #) + 143# -> (# 0xab00#, 0xab2f#, "Ethiopic Extended-A\0"# #) + 144# -> (# 0xab30#, 0xab6f#, "Latin Extended-E\0"# #) + 145# -> (# 0xab70#, 0xabbf#, "Cherokee Supplement\0"# #) + 146# -> (# 0xabc0#, 0xabff#, "Meetei Mayek\0"# #) + 147# -> (# 0xac00#, 0xd7af#, "Hangul Syllables\0"# #) + 148# -> (# 0xd7b0#, 0xd7ff#, "Hangul Jamo Extended-B\0"# #) + 149# -> (# 0xd800#, 0xdb7f#, "High Surrogates\0"# #) + 150# -> (# 0xdb80#, 0xdbff#, "High Private Use Surrogates\0"# #) + 151# -> (# 0xdc00#, 0xdfff#, "Low Surrogates\0"# #) + 152# -> (# 0xe000#, 0xf8ff#, "Private Use Area\0"# #) + 153# -> (# 0xf900#, 0xfaff#, "CJK Compatibility Ideographs\0"# #) + 154# -> (# 0xfb00#, 0xfb4f#, "Alphabetic Presentation Forms\0"# #) + 155# -> (# 0xfb50#, 0xfdff#, "Arabic Presentation Forms-A\0"# #) + 156# -> (# 0xfe00#, 0xfe0f#, "Variation Selectors\0"# #) + 157# -> (# 0xfe10#, 0xfe1f#, "Vertical Forms\0"# #) + 158# -> (# 0xfe20#, 0xfe2f#, "Combining Half Marks\0"# #) + 159# -> (# 0xfe30#, 0xfe4f#, "CJK Compatibility Forms\0"# #) + 160# -> (# 0xfe50#, 0xfe6f#, "Small Form Variants\0"# #) + 161# -> (# 0xfe70#, 0xfeff#, "Arabic Presentation Forms-B\0"# #) + 162# -> (# 0xff00#, 0xffef#, "Halfwidth and Fullwidth Forms\0"# #) + 163# -> (# 0xfff0#, 0xffff#, "Specials\0"# #) + 164# -> (# 0x10000#, 0x1007f#, "Linear B Syllabary\0"# #) + 165# -> (# 0x10080#, 0x100ff#, "Linear B Ideograms\0"# #) + 166# -> (# 0x10100#, 0x1013f#, "Aegean Numbers\0"# #) + 167# -> (# 0x10140#, 0x1018f#, "Ancient Greek Numbers\0"# #) + 168# -> (# 0x10190#, 0x101cf#, "Ancient Symbols\0"# #) + 169# -> (# 0x101d0#, 0x101ff#, "Phaistos Disc\0"# #) + 170# -> (# 0x10280#, 0x1029f#, "Lycian\0"# #) + 171# -> (# 0x102a0#, 0x102df#, "Carian\0"# #) + 172# -> (# 0x102e0#, 0x102ff#, "Coptic Epact Numbers\0"# #) + 173# -> (# 0x10300#, 0x1032f#, "Old Italic\0"# #) + 174# -> (# 0x10330#, 0x1034f#, "Gothic\0"# #) + 175# -> (# 0x10350#, 0x1037f#, "Old Permic\0"# #) + 176# -> (# 0x10380#, 0x1039f#, "Ugaritic\0"# #) + 177# -> (# 0x103a0#, 0x103df#, "Old Persian\0"# #) + 178# -> (# 0x10400#, 0x1044f#, "Deseret\0"# #) + 179# -> (# 0x10450#, 0x1047f#, "Shavian\0"# #) + 180# -> (# 0x10480#, 0x104af#, "Osmanya\0"# #) + 181# -> (# 0x104b0#, 0x104ff#, "Osage\0"# #) + 182# -> (# 0x10500#, 0x1052f#, "Elbasan\0"# #) + 183# -> (# 0x10530#, 0x1056f#, "Caucasian Albanian\0"# #) + 184# -> (# 0x10570#, 0x105bf#, "Vithkuqi\0"# #) + 185# -> (# 0x10600#, 0x1077f#, "Linear A\0"# #) + 186# -> (# 0x10780#, 0x107bf#, "Latin Extended-F\0"# #) + 187# -> (# 0x10800#, 0x1083f#, "Cypriot Syllabary\0"# #) + 188# -> (# 0x10840#, 0x1085f#, "Imperial Aramaic\0"# #) + 189# -> (# 0x10860#, 0x1087f#, "Palmyrene\0"# #) + 190# -> (# 0x10880#, 0x108af#, "Nabataean\0"# #) + 191# -> (# 0x108e0#, 0x108ff#, "Hatran\0"# #) + 192# -> (# 0x10900#, 0x1091f#, "Phoenician\0"# #) + 193# -> (# 0x10920#, 0x1093f#, "Lydian\0"# #) + 194# -> (# 0x10980#, 0x1099f#, "Meroitic Hieroglyphs\0"# #) + 195# -> (# 0x109a0#, 0x109ff#, "Meroitic Cursive\0"# #) + 196# -> (# 0x10a00#, 0x10a5f#, "Kharoshthi\0"# #) + 197# -> (# 0x10a60#, 0x10a7f#, "Old South Arabian\0"# #) + 198# -> (# 0x10a80#, 0x10a9f#, "Old North Arabian\0"# #) + 199# -> (# 0x10ac0#, 0x10aff#, "Manichaean\0"# #) + 200# -> (# 0x10b00#, 0x10b3f#, "Avestan\0"# #) + 201# -> (# 0x10b40#, 0x10b5f#, "Inscriptional Parthian\0"# #) + 202# -> (# 0x10b60#, 0x10b7f#, "Inscriptional Pahlavi\0"# #) + 203# -> (# 0x10b80#, 0x10baf#, "Psalter Pahlavi\0"# #) + 204# -> (# 0x10c00#, 0x10c4f#, "Old Turkic\0"# #) + 205# -> (# 0x10c80#, 0x10cff#, "Old Hungarian\0"# #) + 206# -> (# 0x10d00#, 0x10d3f#, "Hanifi Rohingya\0"# #) + 207# -> (# 0x10e60#, 0x10e7f#, "Rumi Numeral Symbols\0"# #) + 208# -> (# 0x10e80#, 0x10ebf#, "Yezidi\0"# #) + 209# -> (# 0x10ec0#, 0x10eff#, "Arabic Extended-C\0"# #) + 210# -> (# 0x10f00#, 0x10f2f#, "Old Sogdian\0"# #) + 211# -> (# 0x10f30#, 0x10f6f#, "Sogdian\0"# #) + 212# -> (# 0x10f70#, 0x10faf#, "Old Uyghur\0"# #) + 213# -> (# 0x10fb0#, 0x10fdf#, "Chorasmian\0"# #) + 214# -> (# 0x10fe0#, 0x10fff#, "Elymaic\0"# #) + 215# -> (# 0x11000#, 0x1107f#, "Brahmi\0"# #) + 216# -> (# 0x11080#, 0x110cf#, "Kaithi\0"# #) + 217# -> (# 0x110d0#, 0x110ff#, "Sora Sompeng\0"# #) + 218# -> (# 0x11100#, 0x1114f#, "Chakma\0"# #) + 219# -> (# 0x11150#, 0x1117f#, "Mahajani\0"# #) + 220# -> (# 0x11180#, 0x111df#, "Sharada\0"# #) + 221# -> (# 0x111e0#, 0x111ff#, "Sinhala Archaic Numbers\0"# #) + 222# -> (# 0x11200#, 0x1124f#, "Khojki\0"# #) + 223# -> (# 0x11280#, 0x112af#, "Multani\0"# #) + 224# -> (# 0x112b0#, 0x112ff#, "Khudawadi\0"# #) + 225# -> (# 0x11300#, 0x1137f#, "Grantha\0"# #) + 226# -> (# 0x11400#, 0x1147f#, "Newa\0"# #) + 227# -> (# 0x11480#, 0x114df#, "Tirhuta\0"# #) + 228# -> (# 0x11580#, 0x115ff#, "Siddham\0"# #) + 229# -> (# 0x11600#, 0x1165f#, "Modi\0"# #) + 230# -> (# 0x11660#, 0x1167f#, "Mongolian Supplement\0"# #) + 231# -> (# 0x11680#, 0x116cf#, "Takri\0"# #) + 232# -> (# 0x11700#, 0x1174f#, "Ahom\0"# #) + 233# -> (# 0x11800#, 0x1184f#, "Dogra\0"# #) + 234# -> (# 0x118a0#, 0x118ff#, "Warang Citi\0"# #) + 235# -> (# 0x11900#, 0x1195f#, "Dives Akuru\0"# #) + 236# -> (# 0x119a0#, 0x119ff#, "Nandinagari\0"# #) + 237# -> (# 0x11a00#, 0x11a4f#, "Zanabazar Square\0"# #) + 238# -> (# 0x11a50#, 0x11aaf#, "Soyombo\0"# #) + 239# -> (# 0x11ab0#, 0x11abf#, "Unified Canadian Aboriginal Syllabics Extended-A\0"# #) + 240# -> (# 0x11ac0#, 0x11aff#, "Pau Cin Hau\0"# #) + 241# -> (# 0x11b00#, 0x11b5f#, "Devanagari Extended-A\0"# #) + 242# -> (# 0x11c00#, 0x11c6f#, "Bhaiksuki\0"# #) + 243# -> (# 0x11c70#, 0x11cbf#, "Marchen\0"# #) + 244# -> (# 0x11d00#, 0x11d5f#, "Masaram Gondi\0"# #) + 245# -> (# 0x11d60#, 0x11daf#, "Gunjala Gondi\0"# #) + 246# -> (# 0x11ee0#, 0x11eff#, "Makasar\0"# #) + 247# -> (# 0x11f00#, 0x11f5f#, "Kawi\0"# #) + 248# -> (# 0x11fb0#, 0x11fbf#, "Lisu Supplement\0"# #) + 249# -> (# 0x11fc0#, 0x11fff#, "Tamil Supplement\0"# #) + 250# -> (# 0x12000#, 0x123ff#, "Cuneiform\0"# #) + 251# -> (# 0x12400#, 0x1247f#, "Cuneiform Numbers and Punctuation\0"# #) + 252# -> (# 0x12480#, 0x1254f#, "Early Dynastic Cuneiform\0"# #) + 253# -> (# 0x12f90#, 0x12fff#, "Cypro-Minoan\0"# #) + 254# -> (# 0x13000#, 0x1342f#, "Egyptian Hieroglyphs\0"# #) + 255# -> (# 0x13430#, 0x1345f#, "Egyptian Hieroglyph Format Controls\0"# #) + 256# -> (# 0x14400#, 0x1467f#, "Anatolian Hieroglyphs\0"# #) + 257# -> (# 0x16800#, 0x16a3f#, "Bamum Supplement\0"# #) + 258# -> (# 0x16a40#, 0x16a6f#, "Mro\0"# #) + 259# -> (# 0x16a70#, 0x16acf#, "Tangsa\0"# #) + 260# -> (# 0x16ad0#, 0x16aff#, "Bassa Vah\0"# #) + 261# -> (# 0x16b00#, 0x16b8f#, "Pahawh Hmong\0"# #) + 262# -> (# 0x16e40#, 0x16e9f#, "Medefaidrin\0"# #) + 263# -> (# 0x16f00#, 0x16f9f#, "Miao\0"# #) + 264# -> (# 0x16fe0#, 0x16fff#, "Ideographic Symbols and Punctuation\0"# #) + 265# -> (# 0x17000#, 0x187ff#, "Tangut\0"# #) + 266# -> (# 0x18800#, 0x18aff#, "Tangut Components\0"# #) + 267# -> (# 0x18b00#, 0x18cff#, "Khitan Small Script\0"# #) + 268# -> (# 0x18d00#, 0x18d7f#, "Tangut Supplement\0"# #) + 269# -> (# 0x1aff0#, 0x1afff#, "Kana Extended-B\0"# #) + 270# -> (# 0x1b000#, 0x1b0ff#, "Kana Supplement\0"# #) + 271# -> (# 0x1b100#, 0x1b12f#, "Kana Extended-A\0"# #) + 272# -> (# 0x1b130#, 0x1b16f#, "Small Kana Extension\0"# #) + 273# -> (# 0x1b170#, 0x1b2ff#, "Nushu\0"# #) + 274# -> (# 0x1bc00#, 0x1bc9f#, "Duployan\0"# #) + 275# -> (# 0x1bca0#, 0x1bcaf#, "Shorthand Format Controls\0"# #) + 276# -> (# 0x1cf00#, 0x1cfcf#, "Znamenny Musical Notation\0"# #) + 277# -> (# 0x1d000#, 0x1d0ff#, "Byzantine Musical Symbols\0"# #) + 278# -> (# 0x1d100#, 0x1d1ff#, "Musical Symbols\0"# #) + 279# -> (# 0x1d200#, 0x1d24f#, "Ancient Greek Musical Notation\0"# #) + 280# -> (# 0x1d2c0#, 0x1d2df#, "Kaktovik Numerals\0"# #) + 281# -> (# 0x1d2e0#, 0x1d2ff#, "Mayan Numerals\0"# #) + 282# -> (# 0x1d300#, 0x1d35f#, "Tai Xuan Jing Symbols\0"# #) + 283# -> (# 0x1d360#, 0x1d37f#, "Counting Rod Numerals\0"# #) + 284# -> (# 0x1d400#, 0x1d7ff#, "Mathematical Alphanumeric Symbols\0"# #) + 285# -> (# 0x1d800#, 0x1daaf#, "Sutton SignWriting\0"# #) + 286# -> (# 0x1df00#, 0x1dfff#, "Latin Extended-G\0"# #) + 287# -> (# 0x1e000#, 0x1e02f#, "Glagolitic Supplement\0"# #) + 288# -> (# 0x1e030#, 0x1e08f#, "Cyrillic Extended-D\0"# #) + 289# -> (# 0x1e100#, 0x1e14f#, "Nyiakeng Puachue Hmong\0"# #) + 290# -> (# 0x1e290#, 0x1e2bf#, "Toto\0"# #) + 291# -> (# 0x1e2c0#, 0x1e2ff#, "Wancho\0"# #) + 292# -> (# 0x1e4d0#, 0x1e4ff#, "Nag Mundari\0"# #) + 293# -> (# 0x1e7e0#, 0x1e7ff#, "Ethiopic Extended-B\0"# #) + 294# -> (# 0x1e800#, 0x1e8df#, "Mende Kikakui\0"# #) + 295# -> (# 0x1e900#, 0x1e95f#, "Adlam\0"# #) + 296# -> (# 0x1ec70#, 0x1ecbf#, "Indic Siyaq Numbers\0"# #) + 297# -> (# 0x1ed00#, 0x1ed4f#, "Ottoman Siyaq Numbers\0"# #) + 298# -> (# 0x1ee00#, 0x1eeff#, "Arabic Mathematical Alphabetic Symbols\0"# #) + 299# -> (# 0x1f000#, 0x1f02f#, "Mahjong Tiles\0"# #) + 300# -> (# 0x1f030#, 0x1f09f#, "Domino Tiles\0"# #) + 301# -> (# 0x1f0a0#, 0x1f0ff#, "Playing Cards\0"# #) + 302# -> (# 0x1f100#, 0x1f1ff#, "Enclosed Alphanumeric Supplement\0"# #) + 303# -> (# 0x1f200#, 0x1f2ff#, "Enclosed Ideographic Supplement\0"# #) + 304# -> (# 0x1f300#, 0x1f5ff#, "Miscellaneous Symbols and Pictographs\0"# #) + 305# -> (# 0x1f600#, 0x1f64f#, "Emoticons\0"# #) + 306# -> (# 0x1f650#, 0x1f67f#, "Ornamental Dingbats\0"# #) + 307# -> (# 0x1f680#, 0x1f6ff#, "Transport and Map Symbols\0"# #) + 308# -> (# 0x1f700#, 0x1f77f#, "Alchemical Symbols\0"# #) + 309# -> (# 0x1f780#, 0x1f7ff#, "Geometric Shapes Extended\0"# #) + 310# -> (# 0x1f800#, 0x1f8ff#, "Supplemental Arrows-C\0"# #) + 311# -> (# 0x1f900#, 0x1f9ff#, "Supplemental Symbols and Pictographs\0"# #) + 312# -> (# 0x1fa00#, 0x1fa6f#, "Chess Symbols\0"# #) + 313# -> (# 0x1fa70#, 0x1faff#, "Symbols and Pictographs Extended-A\0"# #) + 314# -> (# 0x1fb00#, 0x1fbff#, "Symbols for Legacy Computing\0"# #) + 315# -> (# 0x20000#, 0x2a6df#, "CJK Unified Ideographs Extension B\0"# #) + 316# -> (# 0x2a700#, 0x2b73f#, "CJK Unified Ideographs Extension C\0"# #) + 317# -> (# 0x2b740#, 0x2b81f#, "CJK Unified Ideographs Extension D\0"# #) + 318# -> (# 0x2b820#, 0x2ceaf#, "CJK Unified Ideographs Extension E\0"# #) + 319# -> (# 0x2ceb0#, 0x2ebef#, "CJK Unified Ideographs Extension F\0"# #) + 320# -> (# 0x2f800#, 0x2fa1f#, "CJK Compatibility Ideographs Supplement\0"# #) + 321# -> (# 0x30000#, 0x3134f#, "CJK Unified Ideographs Extension G\0"# #) + 322# -> (# 0x31350#, 0x323af#, "CJK Unified Ideographs Extension H\0"# #) + 323# -> (# 0xe0000#, 0xe007f#, "Tags\0"# #) + 324# -> (# 0xe0100#, 0xe01ef#, "Variation Selectors Supplement\0"# #) + 325# -> (# 0xf0000#, 0xfffff#, "Supplementary Private Use Area-A\0"# #) + _ -> (# 0x100000#, 0x10ffff#, "Supplementary Private Use Area-B\0"# #) --- | Character block, if defined. +-- | Character block, if defined, else -1. -- -- @since 0.3.1 -block :: Char -> Maybe Int -block (C# c#) = getBlock 0# 326# +block :: Char# -> Int# +block c# = getBlock 0# 326# where -- [NOTE] Encoding -- A range is encoded as two LE Word32: @@ -709,7 +710,7 @@ block (C# c#) = getBlock 0# 326# -- Binary search getBlock l# u# = if isTrue# (l# ># u#) - then Nothing + then -1# else let k# = l# +# uncheckedIShiftRL# (u# -# l#) 1# j# = k# `uncheckedIShiftL#` 1# @@ -725,7 +726,7 @@ block (C# c#) = getBlock 0# 326# then getBlock l# (k# -# 1#) -- cp in block: get block index else let block# = cpL0# `uncheckedShiftRL#` 21# - in Just (I# (word2Int# block#)) + in word2Int# block# getRawCodePoint# = lookupWord32# ranges# diff --git a/unicode-data/test/Unicode/CharSpec.hs b/unicode-data/test/Unicode/CharSpec.hs index 949b477c..028735e6 100644 --- a/unicode-data/test/Unicode/CharSpec.hs +++ b/unicode-data/test/Unicode/CharSpec.hs @@ -72,6 +72,10 @@ spec = do Just _ -> pure () Nothing -> UChar.generalCategory c `shouldBe` UChar.NotAssigned } in traverse_ check [minBound..maxBound] + it "Examples" do + let blockDef = UBlocks.blockDefinition UBlocks.Latin1Supplement + UBlocks.blockRange blockDef `shouldBe` (0x0080, 0x00ff) + UBlocks.blockName blockDef `shouldBe` "Latin-1 Supplement" it "Characters are in the definition of their corresponding block" let { check c = case UBlocks.block c of diff --git a/unicode-data/unicode-data.cabal b/unicode-data/unicode-data.cabal index d9caab88..943896ec 100644 --- a/unicode-data/unicode-data.cabal +++ b/unicode-data/unicode-data.cabal @@ -118,6 +118,9 @@ library ghc-options: -O2 build-depends: base >=4.7 && < 4.21 + if impl(ghc < 9.0) + -- Required for unpackCString# + build-depends: ghc-prim test-suite test import: default-extensions, compile-options