diff --git a/unicode-data/exe/Parser/Text.hs b/unicode-data/exe/Parser/Text.hs index 77a12675..4666de28 100644 --- a/unicode-data/exe/Parser/Text.hs +++ b/unicode-data/exe/Parser/Text.hs @@ -237,6 +237,10 @@ enumMapToAddrLiteral xs cs = foldr go cs xs then fromIntegral w else error $ "Cannot convert to Word8: " <> show a +-- Encode Word32 to [Word8] little endian +word32ToWord8s :: Word32 -> [Word8] +word32ToWord8s n = (\k -> fromIntegral ((n `shiftR` k) .&. 0xff)) <$> [0,8..24] + -- This bit of code is duplicated but this duplication allows us to reduce 2 -- dependencies on the executable. @@ -278,30 +282,30 @@ genBlocksModule genBlocksModule moduleName = done <$> Fold.foldl' step initial where - done (blocks, defs, ranges, _) = unlines + done (blocks, defs, ranges) = let ranges' = reverse ranges in unlines [ apacheLicense 2022 moduleName - , "{-# LANGUAGE MultiWayIf #-}" + , "{-# LANGUAGE CPP, MultiWayIf #-}" , "{-# OPTIONS_HADDOCK hide #-}" , "" , "module " <> moduleName - , "(Block(..), BlockDefinition(..), block, blockDefinition)" + , "(Block(..), BlockDefinition(..), block, blockDefinition, allBlockRanges)" , "where" , "" - , "import Data.Char (ord)" , "import Data.Ix (Ix)" + , "import GHC.Exts" , "" , "-- [TODO] @since" , "-- | Unicode block." , "data Block" - , " = " <> mconcat (intersperse "\n | " (reverse blocks)) - , " deriving (Enum, Bounded, Eq, Ord, Ix, Show)" + , " = " <> mconcat (intersperse "\n | " (reverse blocks)) + , " deriving (Enum, Bounded, Eq, Ord, Ix, Show)" , "" , "-- [TODO] @since" , "-- | Block definition: range and name." , "data BlockDefinition = BlockDefinition" - , " { blockRange :: !(Int, Int) -- ^ Range" - , " , blockName :: !String -- ^ Name" - , " } deriving (Eq, Ord, Show)" + , " { blockRange :: !(Int, Int) -- ^ Range" + , " , blockName :: !String -- ^ Name" + , " } deriving (Eq, Ord, Show)" , "" , "-- [TODO] @since" , "-- | Block definition" @@ -309,24 +313,70 @@ genBlocksModule moduleName = done <$> Fold.foldl' step initial , "blockDefinition b = case b of" , mconcat (reverse defs) , "-- [TODO] @since" + , "-- | All the block ranges, in ascending order." + , "{-# INLINE allBlockRanges #-}" + , "allBlockRanges :: [(Int, Int)]" + , "allBlockRanges =" + , " " <> show ranges' + , "" + , "-- [TODO] @since" , "-- | Character block, if defined." - , "block :: Char -> Maybe Block" - , "block c" - , mconcat (reverse ranges) - , " | otherwise = Nothing" - , " where cp = ord c" + , "block :: Char -> Maybe Int" + , "block (C# c#) = getBlock 0# " <> shows (length ranges - 1) "#" + , " where" + , " -- [NOTE] Encoding" + , " -- A range is encoded as two LE Word32:" + , " -- • First one is the lower bound, where the higher 11 bits are the block" + , " -- index and the lower 21 bits are the codepoint." + , " -- • Second one is the upper bound, which correspond to the codepoint." + , "" + , " cp# = int2Word# (ord# c#)" + , "" + , " -- Binary search" + , " getBlock l# u# = if isTrue# (l# ># u#)" + , " then Nothing" + , " else" + , " let k# = l# +# uncheckedIShiftRL# (u# -# l#) 1#" + , " j# = k# `uncheckedIShiftL#` 1#" + , " cpL0# = getRawCodePoint# j#" + , " cpL# = cpL0# `and#` 0x1fffff## -- Mask for codepoint: [0..0x10fff]" + , " cpU# = getRawCodePoint# (j# +# 1#)" + , " in if isTrue# (cpU# `ltWord#` cp#)" + , " -- cp > upper bound" + , " then getBlock (k# +# 1#) u#" + , " -- check lower bound" + , " else if isTrue# (cp# `ltWord#` cpL#)" + , " -- cp < lower bound" + , " then getBlock l# (k# -# 1#)" + , " -- cp in block: get block index" + , " else let block# = cpL0# `uncheckedShiftRL#` 21#" + , " in Just (I# (word2Int# block#))" + , "" + , " getRawCodePoint# k# =" + , "#ifdef WORDS_BIGENDIAN" + , "#if MIN_VERSION_base(4,16,0)" + , " byteSwap32# (word32ToWord# (indexWord32OffAddr# addr# k#))" + , "#else" + , " byteSwap32# (indexWord32OffAddr# ranges# k#)" + , "#endif" + , "#elif MIN_VERSION_base(4,16,0)" + , " word32ToWord# (indexWord32OffAddr# ranges# k#)" + , "#else" + , " indexWord32OffAddr# ranges# k#" + , "#endif" + , "" + , " -- Encoded ranges" + , " ranges# = \"" <> enumMapToAddrLiteral (mkRanges ranges') "\"#" ] - initial :: ([String], [String], [String], Int) - initial = (mempty, mempty, mempty, 0) + initial :: ([String], [String], [(Int, Int)]) + initial = (mempty, mempty, mempty) - step (blocks, defs, ranges, expected) (blockName, blockRange) = + step (blocks, defs, ranges) (blockName, blockRange) = let blockID = mkHaskellConstructor blockName - precedByNoBlock = expected < fst blockRange in ( mkBlockConstructor blockID blockName blockRange : blocks , mkBlockDef blockID blockName blockRange : defs - , mkBlockRange blockID blockRange precedByNoBlock : ranges - , succ (snd blockRange) ) + , blockRange : ranges ) mkBlockConstructor blockID blockName (l, u) = mconcat [ blockID @@ -340,7 +390,7 @@ genBlocksModule moduleName = done <$> Fold.foldl' step initial ] mkBlockDef blockID blockName (l, u) = mconcat - [ " " + [ " " , blockID , " -> BlockDefinition (0x" , showPaddedHex l @@ -351,22 +401,17 @@ genBlocksModule moduleName = done <$> Fold.foldl' step initial , "\n" ] - mkBlockRange :: String -> (Int, Int) -> Bool -> String - mkBlockRange blockID blockRange precedByNoBlock = - let { - noBlock = mconcat - [ " | cp <= 0x" - , showHex (fst blockRange - 1) " = Nothing\n" - ]; - block = mconcat - [ " | cp <= 0x" - , showHex (snd blockRange) " = Just " - , blockID - , "\n" - ] - } in if precedByNoBlock - then noBlock <> block - else block + -- [NOTE] Encoding: a range is encoded as two LE Word32: + -- • First one is the lower bound, where the higher 11 bits are the block + -- index and the lower 21 bits are the codepoint. + -- • Second one is upper bound, which correspond to the codepoint. + mkRanges :: [(Int, Int)] -> [Word8] + mkRanges = foldMap (uncurry mkBlockRange) . zip [0..] + mkBlockRange :: Word32 -> (Int, Int) -> [Word8] + mkBlockRange idx (l, u) = encodeBound idx l <> encodeBound 0 u + + encodeBound :: Word32 -> Int -> [Word8] + encodeBound idx n = word32ToWord8s ((idx `shiftL` 21) .|. fromIntegral n) genScriptsModule :: Monad m @@ -493,11 +538,8 @@ genScriptsModule moduleName = Left c -> encodeBytes (fromIntegral (ord c)) Right (l, u) -> encodeBytes (setBit (fromIntegral (ord l)) 31) <> encodeBytes (setBit (fromIntegral (ord u)) 31) - encodeBytes = foldr addByte "" . toWord8s + encodeBytes = foldr addByte "" . word32ToWord8s addByte n acc = '\\' : shows n acc - -- Encode Word32 to [Word8] little endian - toWord8s :: Word32 -> [Word8] - toWord8s n = (\k -> fromIntegral ((n `shiftR` k) .&. 0xff)) <$> [0,8..24] mkCharScripts :: [String] -> [ScriptLine] -> String mkCharScripts scripts scriptsRanges = diff --git a/unicode-data/lib/Unicode/Char/General/Blocks.hs b/unicode-data/lib/Unicode/Char/General/Blocks.hs index 7093296f..265e8b33 100644 --- a/unicode-data/lib/Unicode/Char/General/Blocks.hs +++ b/unicode-data/lib/Unicode/Char/General/Blocks.hs @@ -12,7 +12,7 @@ module Unicode.Char.General.Blocks ( B.Block(..) , B.BlockDefinition(..) - , B.block + , block , B.blockDefinition , inBlock , allBlockRanges @@ -26,6 +26,12 @@ import Data.Char (chr, ord) import Data.Ix (inRange) import qualified Unicode.Internal.Char.Blocks as B +-- [TODO] @since +-- | Character block, if defined. +{-# INLINE block #-} +block :: Char -> Maybe B.Block +block = fmap toEnum . B.block + -- [TODO] @since -- | Check if a character is in a block. {-# INLINE inBlock #-} @@ -36,11 +42,11 @@ inBlock b = inRange (B.blockRange (B.blockDefinition b)) . ord -- | All the block ranges, in ascending order. {-# INLINE allBlockRanges #-} allBlockRanges :: [(Int, Int)] -allBlockRanges = B.blockRange . B.blockDefinition <$> [minBound..maxBound] +allBlockRanges = B.allBlockRanges -- [TODO] @since -- | Variant of 'allBlockRanges', with ranges expressed as 'Char's instead of -- 'Int's. {-# INLINE allBlockRanges' #-} allBlockRanges' :: [(Char, Char)] -allBlockRanges' = (chr *** chr) <$> allBlockRanges +allBlockRanges' = (chr *** chr) <$> B.allBlockRanges diff --git a/unicode-data/lib/Unicode/Internal/Char/Blocks.hs b/unicode-data/lib/Unicode/Internal/Char/Blocks.hs index e3f4564f..c8d62335 100644 --- a/unicode-data/lib/Unicode/Internal/Char/Blocks.hs +++ b/unicode-data/lib/Unicode/Internal/Char/Blocks.hs @@ -6,20 +6,20 @@ -- Maintainer : streamly@composewell.com -- Stability : experimental -{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE CPP, MultiWayIf #-} {-# OPTIONS_HADDOCK hide #-} module Unicode.Internal.Char.Blocks -(Block(..), BlockDefinition(..), block, blockDefinition) +(Block(..), BlockDefinition(..), block, blockDefinition, allBlockRanges) where -import Data.Char (ord) import Data.Ix (Ix) +import GHC.Exts -- [TODO] @since -- | Unicode block. data Block - = Basic_Latin -- ^ @U+0000..U+007F@: Basic Latin. + = Basic_Latin -- ^ @U+0000..U+007F@: Basic Latin. | Latin_1_Supplement -- ^ @U+0080..U+00FF@: Latin-1 Supplement. | Latin_Extended_A -- ^ @U+0100..U+017F@: Latin Extended-A. | Latin_Extended_B -- ^ @U+0180..U+024F@: Latin Extended-B. @@ -339,715 +339,392 @@ data Block | Variation_Selectors_Supplement -- ^ @U+E0100..U+E01EF@: Variation Selectors Supplement. | Supplementary_Private_Use_Area_A -- ^ @U+F0000..U+FFFFF@: Supplementary Private Use Area-A. | Supplementary_Private_Use_Area_B -- ^ @U+100000..U+10FFFF@: Supplementary Private Use Area-B. - deriving (Enum, Bounded, Eq, Ord, Ix, Show) + deriving (Enum, Bounded, Eq, Ord, Ix, Show) -- [TODO] @since -- | Block definition: range and name. data BlockDefinition = BlockDefinition - { blockRange :: !(Int, Int) -- ^ Range - , blockName :: !String -- ^ Name - } deriving (Eq, Ord, Show) + { blockRange :: !(Int, Int) -- ^ Range + , blockName :: !String -- ^ Name + } deriving (Eq, Ord, Show) -- [TODO] @since -- | Block definition blockDefinition :: Block -> BlockDefinition blockDefinition b = case b of - Basic_Latin -> BlockDefinition (0x0000, 0x007f) "Basic Latin" - Latin_1_Supplement -> BlockDefinition (0x0080, 0x00ff) "Latin-1 Supplement" - Latin_Extended_A -> BlockDefinition (0x0100, 0x017f) "Latin Extended-A" - Latin_Extended_B -> BlockDefinition (0x0180, 0x024f) "Latin Extended-B" - IPA_Extensions -> BlockDefinition (0x0250, 0x02af) "IPA Extensions" - Spacing_Modifier_Letters -> BlockDefinition (0x02b0, 0x02ff) "Spacing Modifier Letters" - Combining_Diacritical_Marks -> BlockDefinition (0x0300, 0x036f) "Combining Diacritical Marks" - Greek_and_Coptic -> BlockDefinition (0x0370, 0x03ff) "Greek and Coptic" - Cyrillic -> BlockDefinition (0x0400, 0x04ff) "Cyrillic" - Cyrillic_Supplement -> BlockDefinition (0x0500, 0x052f) "Cyrillic Supplement" - Armenian -> BlockDefinition (0x0530, 0x058f) "Armenian" - Hebrew -> BlockDefinition (0x0590, 0x05ff) "Hebrew" - Arabic -> BlockDefinition (0x0600, 0x06ff) "Arabic" - Syriac -> BlockDefinition (0x0700, 0x074f) "Syriac" - Arabic_Supplement -> BlockDefinition (0x0750, 0x077f) "Arabic Supplement" - Thaana -> BlockDefinition (0x0780, 0x07bf) "Thaana" - NKo -> BlockDefinition (0x07c0, 0x07ff) "NKo" - Samaritan -> BlockDefinition (0x0800, 0x083f) "Samaritan" - Mandaic -> BlockDefinition (0x0840, 0x085f) "Mandaic" - Syriac_Supplement -> BlockDefinition (0x0860, 0x086f) "Syriac Supplement" - Arabic_Extended_B -> BlockDefinition (0x0870, 0x089f) "Arabic Extended-B" - Arabic_Extended_A -> BlockDefinition (0x08a0, 0x08ff) "Arabic Extended-A" - Devanagari -> BlockDefinition (0x0900, 0x097f) "Devanagari" - Bengali -> BlockDefinition (0x0980, 0x09ff) "Bengali" - Gurmukhi -> BlockDefinition (0x0a00, 0x0a7f) "Gurmukhi" - Gujarati -> BlockDefinition (0x0a80, 0x0aff) "Gujarati" - Oriya -> BlockDefinition (0x0b00, 0x0b7f) "Oriya" - Tamil -> BlockDefinition (0x0b80, 0x0bff) "Tamil" - Telugu -> BlockDefinition (0x0c00, 0x0c7f) "Telugu" - Kannada -> BlockDefinition (0x0c80, 0x0cff) "Kannada" - Malayalam -> BlockDefinition (0x0d00, 0x0d7f) "Malayalam" - Sinhala -> BlockDefinition (0x0d80, 0x0dff) "Sinhala" - Thai -> BlockDefinition (0x0e00, 0x0e7f) "Thai" - Lao -> BlockDefinition (0x0e80, 0x0eff) "Lao" - Tibetan -> BlockDefinition (0x0f00, 0x0fff) "Tibetan" - Myanmar -> BlockDefinition (0x1000, 0x109f) "Myanmar" - Georgian -> BlockDefinition (0x10a0, 0x10ff) "Georgian" - Hangul_Jamo -> BlockDefinition (0x1100, 0x11ff) "Hangul Jamo" - Ethiopic -> BlockDefinition (0x1200, 0x137f) "Ethiopic" - Ethiopic_Supplement -> BlockDefinition (0x1380, 0x139f) "Ethiopic Supplement" - Cherokee -> BlockDefinition (0x13a0, 0x13ff) "Cherokee" - Unified_Canadian_Aboriginal_Syllabics -> BlockDefinition (0x1400, 0x167f) "Unified Canadian Aboriginal Syllabics" - Ogham -> BlockDefinition (0x1680, 0x169f) "Ogham" - Runic -> BlockDefinition (0x16a0, 0x16ff) "Runic" - Tagalog -> BlockDefinition (0x1700, 0x171f) "Tagalog" - Hanunoo -> BlockDefinition (0x1720, 0x173f) "Hanunoo" - Buhid -> BlockDefinition (0x1740, 0x175f) "Buhid" - Tagbanwa -> BlockDefinition (0x1760, 0x177f) "Tagbanwa" - Khmer -> BlockDefinition (0x1780, 0x17ff) "Khmer" - Mongolian -> BlockDefinition (0x1800, 0x18af) "Mongolian" - Unified_Canadian_Aboriginal_Syllabics_Extended -> BlockDefinition (0x18b0, 0x18ff) "Unified Canadian Aboriginal Syllabics Extended" - Limbu -> BlockDefinition (0x1900, 0x194f) "Limbu" - Tai_Le -> BlockDefinition (0x1950, 0x197f) "Tai Le" - New_Tai_Lue -> BlockDefinition (0x1980, 0x19df) "New Tai Lue" - Khmer_Symbols -> BlockDefinition (0x19e0, 0x19ff) "Khmer Symbols" - Buginese -> BlockDefinition (0x1a00, 0x1a1f) "Buginese" - Tai_Tham -> BlockDefinition (0x1a20, 0x1aaf) "Tai Tham" - Combining_Diacritical_Marks_Extended -> BlockDefinition (0x1ab0, 0x1aff) "Combining Diacritical Marks Extended" - Balinese -> BlockDefinition (0x1b00, 0x1b7f) "Balinese" - Sundanese -> BlockDefinition (0x1b80, 0x1bbf) "Sundanese" - Batak -> BlockDefinition (0x1bc0, 0x1bff) "Batak" - Lepcha -> BlockDefinition (0x1c00, 0x1c4f) "Lepcha" - Ol_Chiki -> BlockDefinition (0x1c50, 0x1c7f) "Ol Chiki" - Cyrillic_Extended_C -> BlockDefinition (0x1c80, 0x1c8f) "Cyrillic Extended-C" - Georgian_Extended -> BlockDefinition (0x1c90, 0x1cbf) "Georgian Extended" - Sundanese_Supplement -> BlockDefinition (0x1cc0, 0x1ccf) "Sundanese Supplement" - Vedic_Extensions -> BlockDefinition (0x1cd0, 0x1cff) "Vedic Extensions" - Phonetic_Extensions -> BlockDefinition (0x1d00, 0x1d7f) "Phonetic Extensions" - Phonetic_Extensions_Supplement -> BlockDefinition (0x1d80, 0x1dbf) "Phonetic Extensions Supplement" - Combining_Diacritical_Marks_Supplement -> BlockDefinition (0x1dc0, 0x1dff) "Combining Diacritical Marks Supplement" - Latin_Extended_Additional -> BlockDefinition (0x1e00, 0x1eff) "Latin Extended Additional" - Greek_Extended -> BlockDefinition (0x1f00, 0x1fff) "Greek Extended" - General_Punctuation -> BlockDefinition (0x2000, 0x206f) "General Punctuation" - Superscripts_and_Subscripts -> BlockDefinition (0x2070, 0x209f) "Superscripts and Subscripts" - Currency_Symbols -> BlockDefinition (0x20a0, 0x20cf) "Currency Symbols" - Combining_Diacritical_Marks_for_Symbols -> BlockDefinition (0x20d0, 0x20ff) "Combining Diacritical Marks for Symbols" - Letterlike_Symbols -> BlockDefinition (0x2100, 0x214f) "Letterlike Symbols" - Number_Forms -> BlockDefinition (0x2150, 0x218f) "Number Forms" - Arrows -> BlockDefinition (0x2190, 0x21ff) "Arrows" - Mathematical_Operators -> BlockDefinition (0x2200, 0x22ff) "Mathematical Operators" - Miscellaneous_Technical -> BlockDefinition (0x2300, 0x23ff) "Miscellaneous Technical" - Control_Pictures -> BlockDefinition (0x2400, 0x243f) "Control Pictures" - Optical_Character_Recognition -> BlockDefinition (0x2440, 0x245f) "Optical Character Recognition" - Enclosed_Alphanumerics -> BlockDefinition (0x2460, 0x24ff) "Enclosed Alphanumerics" - Box_Drawing -> BlockDefinition (0x2500, 0x257f) "Box Drawing" - Block_Elements -> BlockDefinition (0x2580, 0x259f) "Block Elements" - Geometric_Shapes -> BlockDefinition (0x25a0, 0x25ff) "Geometric Shapes" - Miscellaneous_Symbols -> BlockDefinition (0x2600, 0x26ff) "Miscellaneous Symbols" - Dingbats -> BlockDefinition (0x2700, 0x27bf) "Dingbats" - Miscellaneous_Mathematical_Symbols_A -> BlockDefinition (0x27c0, 0x27ef) "Miscellaneous Mathematical Symbols-A" - Supplemental_Arrows_A -> BlockDefinition (0x27f0, 0x27ff) "Supplemental Arrows-A" - Braille_Patterns -> BlockDefinition (0x2800, 0x28ff) "Braille Patterns" - Supplemental_Arrows_B -> BlockDefinition (0x2900, 0x297f) "Supplemental Arrows-B" - Miscellaneous_Mathematical_Symbols_B -> BlockDefinition (0x2980, 0x29ff) "Miscellaneous Mathematical Symbols-B" - Supplemental_Mathematical_Operators -> BlockDefinition (0x2a00, 0x2aff) "Supplemental Mathematical Operators" - Miscellaneous_Symbols_and_Arrows -> BlockDefinition (0x2b00, 0x2bff) "Miscellaneous Symbols and Arrows" - Glagolitic -> BlockDefinition (0x2c00, 0x2c5f) "Glagolitic" - Latin_Extended_C -> BlockDefinition (0x2c60, 0x2c7f) "Latin Extended-C" - Coptic -> BlockDefinition (0x2c80, 0x2cff) "Coptic" - Georgian_Supplement -> BlockDefinition (0x2d00, 0x2d2f) "Georgian Supplement" - Tifinagh -> BlockDefinition (0x2d30, 0x2d7f) "Tifinagh" - Ethiopic_Extended -> BlockDefinition (0x2d80, 0x2ddf) "Ethiopic Extended" - Cyrillic_Extended_A -> BlockDefinition (0x2de0, 0x2dff) "Cyrillic Extended-A" - Supplemental_Punctuation -> BlockDefinition (0x2e00, 0x2e7f) "Supplemental Punctuation" - CJK_Radicals_Supplement -> BlockDefinition (0x2e80, 0x2eff) "CJK Radicals Supplement" - Kangxi_Radicals -> BlockDefinition (0x2f00, 0x2fdf) "Kangxi Radicals" - Ideographic_Description_Characters -> BlockDefinition (0x2ff0, 0x2fff) "Ideographic Description Characters" - CJK_Symbols_and_Punctuation -> BlockDefinition (0x3000, 0x303f) "CJK Symbols and Punctuation" - Hiragana -> BlockDefinition (0x3040, 0x309f) "Hiragana" - Katakana -> BlockDefinition (0x30a0, 0x30ff) "Katakana" - Bopomofo -> BlockDefinition (0x3100, 0x312f) "Bopomofo" - Hangul_Compatibility_Jamo -> BlockDefinition (0x3130, 0x318f) "Hangul Compatibility Jamo" - Kanbun -> BlockDefinition (0x3190, 0x319f) "Kanbun" - Bopomofo_Extended -> BlockDefinition (0x31a0, 0x31bf) "Bopomofo Extended" - CJK_Strokes -> BlockDefinition (0x31c0, 0x31ef) "CJK Strokes" - Katakana_Phonetic_Extensions -> BlockDefinition (0x31f0, 0x31ff) "Katakana Phonetic Extensions" - Enclosed_CJK_Letters_and_Months -> BlockDefinition (0x3200, 0x32ff) "Enclosed CJK Letters and Months" - CJK_Compatibility -> BlockDefinition (0x3300, 0x33ff) "CJK Compatibility" - CJK_Unified_Ideographs_Extension_A -> BlockDefinition (0x3400, 0x4dbf) "CJK Unified Ideographs Extension A" - Yijing_Hexagram_Symbols -> BlockDefinition (0x4dc0, 0x4dff) "Yijing Hexagram Symbols" - CJK_Unified_Ideographs -> BlockDefinition (0x4e00, 0x9fff) "CJK Unified Ideographs" - Yi_Syllables -> BlockDefinition (0xa000, 0xa48f) "Yi Syllables" - Yi_Radicals -> BlockDefinition (0xa490, 0xa4cf) "Yi Radicals" - Lisu -> BlockDefinition (0xa4d0, 0xa4ff) "Lisu" - Vai -> BlockDefinition (0xa500, 0xa63f) "Vai" - Cyrillic_Extended_B -> BlockDefinition (0xa640, 0xa69f) "Cyrillic Extended-B" - Bamum -> BlockDefinition (0xa6a0, 0xa6ff) "Bamum" - Modifier_Tone_Letters -> BlockDefinition (0xa700, 0xa71f) "Modifier Tone Letters" - Latin_Extended_D -> BlockDefinition (0xa720, 0xa7ff) "Latin Extended-D" - Syloti_Nagri -> BlockDefinition (0xa800, 0xa82f) "Syloti Nagri" - Common_Indic_Number_Forms -> BlockDefinition (0xa830, 0xa83f) "Common Indic Number Forms" - Phags_pa -> BlockDefinition (0xa840, 0xa87f) "Phags-pa" - Saurashtra -> BlockDefinition (0xa880, 0xa8df) "Saurashtra" - Devanagari_Extended -> BlockDefinition (0xa8e0, 0xa8ff) "Devanagari Extended" - Kayah_Li -> BlockDefinition (0xa900, 0xa92f) "Kayah Li" - Rejang -> BlockDefinition (0xa930, 0xa95f) "Rejang" - Hangul_Jamo_Extended_A -> BlockDefinition (0xa960, 0xa97f) "Hangul Jamo Extended-A" - Javanese -> BlockDefinition (0xa980, 0xa9df) "Javanese" - Myanmar_Extended_B -> BlockDefinition (0xa9e0, 0xa9ff) "Myanmar Extended-B" - Cham -> BlockDefinition (0xaa00, 0xaa5f) "Cham" - Myanmar_Extended_A -> BlockDefinition (0xaa60, 0xaa7f) "Myanmar Extended-A" - Tai_Viet -> BlockDefinition (0xaa80, 0xaadf) "Tai Viet" - Meetei_Mayek_Extensions -> BlockDefinition (0xaae0, 0xaaff) "Meetei Mayek Extensions" - Ethiopic_Extended_A -> BlockDefinition (0xab00, 0xab2f) "Ethiopic Extended-A" - Latin_Extended_E -> BlockDefinition (0xab30, 0xab6f) "Latin Extended-E" - Cherokee_Supplement -> BlockDefinition (0xab70, 0xabbf) "Cherokee Supplement" - Meetei_Mayek -> BlockDefinition (0xabc0, 0xabff) "Meetei Mayek" - Hangul_Syllables -> BlockDefinition (0xac00, 0xd7af) "Hangul Syllables" - Hangul_Jamo_Extended_B -> BlockDefinition (0xd7b0, 0xd7ff) "Hangul Jamo Extended-B" - High_Surrogates -> BlockDefinition (0xd800, 0xdb7f) "High Surrogates" - High_Private_Use_Surrogates -> BlockDefinition (0xdb80, 0xdbff) "High Private Use Surrogates" - Low_Surrogates -> BlockDefinition (0xdc00, 0xdfff) "Low Surrogates" - Private_Use_Area -> BlockDefinition (0xe000, 0xf8ff) "Private Use Area" - CJK_Compatibility_Ideographs -> BlockDefinition (0xf900, 0xfaff) "CJK Compatibility Ideographs" - Alphabetic_Presentation_Forms -> BlockDefinition (0xfb00, 0xfb4f) "Alphabetic Presentation Forms" - Arabic_Presentation_Forms_A -> BlockDefinition (0xfb50, 0xfdff) "Arabic Presentation Forms-A" - Variation_Selectors -> BlockDefinition (0xfe00, 0xfe0f) "Variation Selectors" - Vertical_Forms -> BlockDefinition (0xfe10, 0xfe1f) "Vertical Forms" - Combining_Half_Marks -> BlockDefinition (0xfe20, 0xfe2f) "Combining Half Marks" - CJK_Compatibility_Forms -> BlockDefinition (0xfe30, 0xfe4f) "CJK Compatibility Forms" - Small_Form_Variants -> BlockDefinition (0xfe50, 0xfe6f) "Small Form Variants" - Arabic_Presentation_Forms_B -> BlockDefinition (0xfe70, 0xfeff) "Arabic Presentation Forms-B" - Halfwidth_and_Fullwidth_Forms -> BlockDefinition (0xff00, 0xffef) "Halfwidth and Fullwidth Forms" - Specials -> BlockDefinition (0xfff0, 0xffff) "Specials" - Linear_B_Syllabary -> BlockDefinition (0x10000, 0x1007f) "Linear B Syllabary" - Linear_B_Ideograms -> BlockDefinition (0x10080, 0x100ff) "Linear B Ideograms" - Aegean_Numbers -> BlockDefinition (0x10100, 0x1013f) "Aegean Numbers" - Ancient_Greek_Numbers -> BlockDefinition (0x10140, 0x1018f) "Ancient Greek Numbers" - Ancient_Symbols -> BlockDefinition (0x10190, 0x101cf) "Ancient Symbols" - Phaistos_Disc -> BlockDefinition (0x101d0, 0x101ff) "Phaistos Disc" - Lycian -> BlockDefinition (0x10280, 0x1029f) "Lycian" - Carian -> BlockDefinition (0x102a0, 0x102df) "Carian" - Coptic_Epact_Numbers -> BlockDefinition (0x102e0, 0x102ff) "Coptic Epact Numbers" - Old_Italic -> BlockDefinition (0x10300, 0x1032f) "Old Italic" - Gothic -> BlockDefinition (0x10330, 0x1034f) "Gothic" - Old_Permic -> BlockDefinition (0x10350, 0x1037f) "Old Permic" - Ugaritic -> BlockDefinition (0x10380, 0x1039f) "Ugaritic" - Old_Persian -> BlockDefinition (0x103a0, 0x103df) "Old Persian" - Deseret -> BlockDefinition (0x10400, 0x1044f) "Deseret" - Shavian -> BlockDefinition (0x10450, 0x1047f) "Shavian" - Osmanya -> BlockDefinition (0x10480, 0x104af) "Osmanya" - Osage -> BlockDefinition (0x104b0, 0x104ff) "Osage" - Elbasan -> BlockDefinition (0x10500, 0x1052f) "Elbasan" - Caucasian_Albanian -> BlockDefinition (0x10530, 0x1056f) "Caucasian Albanian" - Vithkuqi -> BlockDefinition (0x10570, 0x105bf) "Vithkuqi" - Linear_A -> BlockDefinition (0x10600, 0x1077f) "Linear A" - Latin_Extended_F -> BlockDefinition (0x10780, 0x107bf) "Latin Extended-F" - Cypriot_Syllabary -> BlockDefinition (0x10800, 0x1083f) "Cypriot Syllabary" - Imperial_Aramaic -> BlockDefinition (0x10840, 0x1085f) "Imperial Aramaic" - Palmyrene -> BlockDefinition (0x10860, 0x1087f) "Palmyrene" - Nabataean -> BlockDefinition (0x10880, 0x108af) "Nabataean" - Hatran -> BlockDefinition (0x108e0, 0x108ff) "Hatran" - Phoenician -> BlockDefinition (0x10900, 0x1091f) "Phoenician" - Lydian -> BlockDefinition (0x10920, 0x1093f) "Lydian" - Meroitic_Hieroglyphs -> BlockDefinition (0x10980, 0x1099f) "Meroitic Hieroglyphs" - Meroitic_Cursive -> BlockDefinition (0x109a0, 0x109ff) "Meroitic Cursive" - Kharoshthi -> BlockDefinition (0x10a00, 0x10a5f) "Kharoshthi" - Old_South_Arabian -> BlockDefinition (0x10a60, 0x10a7f) "Old South Arabian" - Old_North_Arabian -> BlockDefinition (0x10a80, 0x10a9f) "Old North Arabian" - Manichaean -> BlockDefinition (0x10ac0, 0x10aff) "Manichaean" - Avestan -> BlockDefinition (0x10b00, 0x10b3f) "Avestan" - Inscriptional_Parthian -> BlockDefinition (0x10b40, 0x10b5f) "Inscriptional Parthian" - Inscriptional_Pahlavi -> BlockDefinition (0x10b60, 0x10b7f) "Inscriptional Pahlavi" - Psalter_Pahlavi -> BlockDefinition (0x10b80, 0x10baf) "Psalter Pahlavi" - Old_Turkic -> BlockDefinition (0x10c00, 0x10c4f) "Old Turkic" - Old_Hungarian -> BlockDefinition (0x10c80, 0x10cff) "Old Hungarian" - Hanifi_Rohingya -> BlockDefinition (0x10d00, 0x10d3f) "Hanifi Rohingya" - Rumi_Numeral_Symbols -> BlockDefinition (0x10e60, 0x10e7f) "Rumi Numeral Symbols" - Yezidi -> BlockDefinition (0x10e80, 0x10ebf) "Yezidi" - Old_Sogdian -> BlockDefinition (0x10f00, 0x10f2f) "Old Sogdian" - Sogdian -> BlockDefinition (0x10f30, 0x10f6f) "Sogdian" - Old_Uyghur -> BlockDefinition (0x10f70, 0x10faf) "Old Uyghur" - Chorasmian -> BlockDefinition (0x10fb0, 0x10fdf) "Chorasmian" - Elymaic -> BlockDefinition (0x10fe0, 0x10fff) "Elymaic" - Brahmi -> BlockDefinition (0x11000, 0x1107f) "Brahmi" - Kaithi -> BlockDefinition (0x11080, 0x110cf) "Kaithi" - Sora_Sompeng -> BlockDefinition (0x110d0, 0x110ff) "Sora Sompeng" - Chakma -> BlockDefinition (0x11100, 0x1114f) "Chakma" - Mahajani -> BlockDefinition (0x11150, 0x1117f) "Mahajani" - Sharada -> BlockDefinition (0x11180, 0x111df) "Sharada" - Sinhala_Archaic_Numbers -> BlockDefinition (0x111e0, 0x111ff) "Sinhala Archaic Numbers" - Khojki -> BlockDefinition (0x11200, 0x1124f) "Khojki" - Multani -> BlockDefinition (0x11280, 0x112af) "Multani" - Khudawadi -> BlockDefinition (0x112b0, 0x112ff) "Khudawadi" - Grantha -> BlockDefinition (0x11300, 0x1137f) "Grantha" - Newa -> BlockDefinition (0x11400, 0x1147f) "Newa" - Tirhuta -> BlockDefinition (0x11480, 0x114df) "Tirhuta" - Siddham -> BlockDefinition (0x11580, 0x115ff) "Siddham" - Modi -> BlockDefinition (0x11600, 0x1165f) "Modi" - Mongolian_Supplement -> BlockDefinition (0x11660, 0x1167f) "Mongolian Supplement" - Takri -> BlockDefinition (0x11680, 0x116cf) "Takri" - Ahom -> BlockDefinition (0x11700, 0x1174f) "Ahom" - Dogra -> BlockDefinition (0x11800, 0x1184f) "Dogra" - Warang_Citi -> BlockDefinition (0x118a0, 0x118ff) "Warang Citi" - Dives_Akuru -> BlockDefinition (0x11900, 0x1195f) "Dives Akuru" - Nandinagari -> BlockDefinition (0x119a0, 0x119ff) "Nandinagari" - Zanabazar_Square -> BlockDefinition (0x11a00, 0x11a4f) "Zanabazar Square" - Soyombo -> BlockDefinition (0x11a50, 0x11aaf) "Soyombo" - Unified_Canadian_Aboriginal_Syllabics_Extended_A -> BlockDefinition (0x11ab0, 0x11abf) "Unified Canadian Aboriginal Syllabics Extended-A" - Pau_Cin_Hau -> BlockDefinition (0x11ac0, 0x11aff) "Pau Cin Hau" - Bhaiksuki -> BlockDefinition (0x11c00, 0x11c6f) "Bhaiksuki" - Marchen -> BlockDefinition (0x11c70, 0x11cbf) "Marchen" - Masaram_Gondi -> BlockDefinition (0x11d00, 0x11d5f) "Masaram Gondi" - Gunjala_Gondi -> BlockDefinition (0x11d60, 0x11daf) "Gunjala Gondi" - Makasar -> BlockDefinition (0x11ee0, 0x11eff) "Makasar" - Lisu_Supplement -> BlockDefinition (0x11fb0, 0x11fbf) "Lisu Supplement" - Tamil_Supplement -> BlockDefinition (0x11fc0, 0x11fff) "Tamil Supplement" - Cuneiform -> BlockDefinition (0x12000, 0x123ff) "Cuneiform" - Cuneiform_Numbers_and_Punctuation -> BlockDefinition (0x12400, 0x1247f) "Cuneiform Numbers and Punctuation" - Early_Dynastic_Cuneiform -> BlockDefinition (0x12480, 0x1254f) "Early Dynastic Cuneiform" - Cypro_Minoan -> BlockDefinition (0x12f90, 0x12fff) "Cypro-Minoan" - Egyptian_Hieroglyphs -> BlockDefinition (0x13000, 0x1342f) "Egyptian Hieroglyphs" - Egyptian_Hieroglyph_Format_Controls -> BlockDefinition (0x13430, 0x1343f) "Egyptian Hieroglyph Format Controls" - Anatolian_Hieroglyphs -> BlockDefinition (0x14400, 0x1467f) "Anatolian Hieroglyphs" - Bamum_Supplement -> BlockDefinition (0x16800, 0x16a3f) "Bamum Supplement" - Mro -> BlockDefinition (0x16a40, 0x16a6f) "Mro" - Tangsa -> BlockDefinition (0x16a70, 0x16acf) "Tangsa" - Bassa_Vah -> BlockDefinition (0x16ad0, 0x16aff) "Bassa Vah" - Pahawh_Hmong -> BlockDefinition (0x16b00, 0x16b8f) "Pahawh Hmong" - Medefaidrin -> BlockDefinition (0x16e40, 0x16e9f) "Medefaidrin" - Miao -> BlockDefinition (0x16f00, 0x16f9f) "Miao" - Ideographic_Symbols_and_Punctuation -> BlockDefinition (0x16fe0, 0x16fff) "Ideographic Symbols and Punctuation" - Tangut -> BlockDefinition (0x17000, 0x187ff) "Tangut" - Tangut_Components -> BlockDefinition (0x18800, 0x18aff) "Tangut Components" - Khitan_Small_Script -> BlockDefinition (0x18b00, 0x18cff) "Khitan Small Script" - Tangut_Supplement -> BlockDefinition (0x18d00, 0x18d7f) "Tangut Supplement" - Kana_Extended_B -> BlockDefinition (0x1aff0, 0x1afff) "Kana Extended-B" - Kana_Supplement -> BlockDefinition (0x1b000, 0x1b0ff) "Kana Supplement" - Kana_Extended_A -> BlockDefinition (0x1b100, 0x1b12f) "Kana Extended-A" - Small_Kana_Extension -> BlockDefinition (0x1b130, 0x1b16f) "Small Kana Extension" - Nushu -> BlockDefinition (0x1b170, 0x1b2ff) "Nushu" - Duployan -> BlockDefinition (0x1bc00, 0x1bc9f) "Duployan" - Shorthand_Format_Controls -> BlockDefinition (0x1bca0, 0x1bcaf) "Shorthand Format Controls" - Znamenny_Musical_Notation -> BlockDefinition (0x1cf00, 0x1cfcf) "Znamenny Musical Notation" - Byzantine_Musical_Symbols -> BlockDefinition (0x1d000, 0x1d0ff) "Byzantine Musical Symbols" - Musical_Symbols -> BlockDefinition (0x1d100, 0x1d1ff) "Musical Symbols" - Ancient_Greek_Musical_Notation -> BlockDefinition (0x1d200, 0x1d24f) "Ancient Greek Musical Notation" - Mayan_Numerals -> BlockDefinition (0x1d2e0, 0x1d2ff) "Mayan Numerals" - Tai_Xuan_Jing_Symbols -> BlockDefinition (0x1d300, 0x1d35f) "Tai Xuan Jing Symbols" - Counting_Rod_Numerals -> BlockDefinition (0x1d360, 0x1d37f) "Counting Rod Numerals" - Mathematical_Alphanumeric_Symbols -> BlockDefinition (0x1d400, 0x1d7ff) "Mathematical Alphanumeric Symbols" - Sutton_SignWriting -> BlockDefinition (0x1d800, 0x1daaf) "Sutton SignWriting" - Latin_Extended_G -> BlockDefinition (0x1df00, 0x1dfff) "Latin Extended-G" - Glagolitic_Supplement -> BlockDefinition (0x1e000, 0x1e02f) "Glagolitic Supplement" - Nyiakeng_Puachue_Hmong -> BlockDefinition (0x1e100, 0x1e14f) "Nyiakeng Puachue Hmong" - Toto -> BlockDefinition (0x1e290, 0x1e2bf) "Toto" - Wancho -> BlockDefinition (0x1e2c0, 0x1e2ff) "Wancho" - Ethiopic_Extended_B -> BlockDefinition (0x1e7e0, 0x1e7ff) "Ethiopic Extended-B" - Mende_Kikakui -> BlockDefinition (0x1e800, 0x1e8df) "Mende Kikakui" - Adlam -> BlockDefinition (0x1e900, 0x1e95f) "Adlam" - Indic_Siyaq_Numbers -> BlockDefinition (0x1ec70, 0x1ecbf) "Indic Siyaq Numbers" - Ottoman_Siyaq_Numbers -> BlockDefinition (0x1ed00, 0x1ed4f) "Ottoman Siyaq Numbers" - Arabic_Mathematical_Alphabetic_Symbols -> BlockDefinition (0x1ee00, 0x1eeff) "Arabic Mathematical Alphabetic Symbols" - Mahjong_Tiles -> BlockDefinition (0x1f000, 0x1f02f) "Mahjong Tiles" - Domino_Tiles -> BlockDefinition (0x1f030, 0x1f09f) "Domino Tiles" - Playing_Cards -> BlockDefinition (0x1f0a0, 0x1f0ff) "Playing Cards" - Enclosed_Alphanumeric_Supplement -> BlockDefinition (0x1f100, 0x1f1ff) "Enclosed Alphanumeric Supplement" - Enclosed_Ideographic_Supplement -> BlockDefinition (0x1f200, 0x1f2ff) "Enclosed Ideographic Supplement" - Miscellaneous_Symbols_and_Pictographs -> BlockDefinition (0x1f300, 0x1f5ff) "Miscellaneous Symbols and Pictographs" - Emoticons -> BlockDefinition (0x1f600, 0x1f64f) "Emoticons" - Ornamental_Dingbats -> BlockDefinition (0x1f650, 0x1f67f) "Ornamental Dingbats" - Transport_and_Map_Symbols -> BlockDefinition (0x1f680, 0x1f6ff) "Transport and Map Symbols" - Alchemical_Symbols -> BlockDefinition (0x1f700, 0x1f77f) "Alchemical Symbols" - Geometric_Shapes_Extended -> BlockDefinition (0x1f780, 0x1f7ff) "Geometric Shapes Extended" - Supplemental_Arrows_C -> BlockDefinition (0x1f800, 0x1f8ff) "Supplemental Arrows-C" - Supplemental_Symbols_and_Pictographs -> BlockDefinition (0x1f900, 0x1f9ff) "Supplemental Symbols and Pictographs" - Chess_Symbols -> BlockDefinition (0x1fa00, 0x1fa6f) "Chess Symbols" - Symbols_and_Pictographs_Extended_A -> BlockDefinition (0x1fa70, 0x1faff) "Symbols and Pictographs Extended-A" - Symbols_for_Legacy_Computing -> BlockDefinition (0x1fb00, 0x1fbff) "Symbols for Legacy Computing" - CJK_Unified_Ideographs_Extension_B -> BlockDefinition (0x20000, 0x2a6df) "CJK Unified Ideographs Extension B" - CJK_Unified_Ideographs_Extension_C -> BlockDefinition (0x2a700, 0x2b73f) "CJK Unified Ideographs Extension C" - CJK_Unified_Ideographs_Extension_D -> BlockDefinition (0x2b740, 0x2b81f) "CJK Unified Ideographs Extension D" - CJK_Unified_Ideographs_Extension_E -> BlockDefinition (0x2b820, 0x2ceaf) "CJK Unified Ideographs Extension E" - CJK_Unified_Ideographs_Extension_F -> BlockDefinition (0x2ceb0, 0x2ebef) "CJK Unified Ideographs Extension F" - CJK_Compatibility_Ideographs_Supplement -> BlockDefinition (0x2f800, 0x2fa1f) "CJK Compatibility Ideographs Supplement" - CJK_Unified_Ideographs_Extension_G -> BlockDefinition (0x30000, 0x3134f) "CJK Unified Ideographs Extension G" - Tags -> BlockDefinition (0xe0000, 0xe007f) "Tags" - Variation_Selectors_Supplement -> BlockDefinition (0xe0100, 0xe01ef) "Variation Selectors Supplement" - Supplementary_Private_Use_Area_A -> BlockDefinition (0xf0000, 0xfffff) "Supplementary Private Use Area-A" - Supplementary_Private_Use_Area_B -> BlockDefinition (0x100000, 0x10ffff) "Supplementary Private Use Area-B" + Basic_Latin -> BlockDefinition (0x0000, 0x007f) "Basic Latin" + Latin_1_Supplement -> BlockDefinition (0x0080, 0x00ff) "Latin-1 Supplement" + Latin_Extended_A -> BlockDefinition (0x0100, 0x017f) "Latin Extended-A" + Latin_Extended_B -> BlockDefinition (0x0180, 0x024f) "Latin Extended-B" + IPA_Extensions -> BlockDefinition (0x0250, 0x02af) "IPA Extensions" + Spacing_Modifier_Letters -> BlockDefinition (0x02b0, 0x02ff) "Spacing Modifier Letters" + Combining_Diacritical_Marks -> BlockDefinition (0x0300, 0x036f) "Combining Diacritical Marks" + Greek_and_Coptic -> BlockDefinition (0x0370, 0x03ff) "Greek and Coptic" + Cyrillic -> BlockDefinition (0x0400, 0x04ff) "Cyrillic" + Cyrillic_Supplement -> BlockDefinition (0x0500, 0x052f) "Cyrillic Supplement" + Armenian -> BlockDefinition (0x0530, 0x058f) "Armenian" + Hebrew -> BlockDefinition (0x0590, 0x05ff) "Hebrew" + Arabic -> BlockDefinition (0x0600, 0x06ff) "Arabic" + Syriac -> BlockDefinition (0x0700, 0x074f) "Syriac" + Arabic_Supplement -> BlockDefinition (0x0750, 0x077f) "Arabic Supplement" + Thaana -> BlockDefinition (0x0780, 0x07bf) "Thaana" + NKo -> BlockDefinition (0x07c0, 0x07ff) "NKo" + Samaritan -> BlockDefinition (0x0800, 0x083f) "Samaritan" + Mandaic -> BlockDefinition (0x0840, 0x085f) "Mandaic" + Syriac_Supplement -> BlockDefinition (0x0860, 0x086f) "Syriac Supplement" + Arabic_Extended_B -> BlockDefinition (0x0870, 0x089f) "Arabic Extended-B" + Arabic_Extended_A -> BlockDefinition (0x08a0, 0x08ff) "Arabic Extended-A" + Devanagari -> BlockDefinition (0x0900, 0x097f) "Devanagari" + Bengali -> BlockDefinition (0x0980, 0x09ff) "Bengali" + Gurmukhi -> BlockDefinition (0x0a00, 0x0a7f) "Gurmukhi" + Gujarati -> BlockDefinition (0x0a80, 0x0aff) "Gujarati" + Oriya -> BlockDefinition (0x0b00, 0x0b7f) "Oriya" + Tamil -> BlockDefinition (0x0b80, 0x0bff) "Tamil" + Telugu -> BlockDefinition (0x0c00, 0x0c7f) "Telugu" + Kannada -> BlockDefinition (0x0c80, 0x0cff) "Kannada" + Malayalam -> BlockDefinition (0x0d00, 0x0d7f) "Malayalam" + Sinhala -> BlockDefinition (0x0d80, 0x0dff) "Sinhala" + Thai -> BlockDefinition (0x0e00, 0x0e7f) "Thai" + Lao -> BlockDefinition (0x0e80, 0x0eff) "Lao" + Tibetan -> BlockDefinition (0x0f00, 0x0fff) "Tibetan" + Myanmar -> BlockDefinition (0x1000, 0x109f) "Myanmar" + Georgian -> BlockDefinition (0x10a0, 0x10ff) "Georgian" + Hangul_Jamo -> BlockDefinition (0x1100, 0x11ff) "Hangul Jamo" + Ethiopic -> BlockDefinition (0x1200, 0x137f) "Ethiopic" + Ethiopic_Supplement -> BlockDefinition (0x1380, 0x139f) "Ethiopic Supplement" + Cherokee -> BlockDefinition (0x13a0, 0x13ff) "Cherokee" + Unified_Canadian_Aboriginal_Syllabics -> BlockDefinition (0x1400, 0x167f) "Unified Canadian Aboriginal Syllabics" + Ogham -> BlockDefinition (0x1680, 0x169f) "Ogham" + Runic -> BlockDefinition (0x16a0, 0x16ff) "Runic" + Tagalog -> BlockDefinition (0x1700, 0x171f) "Tagalog" + Hanunoo -> BlockDefinition (0x1720, 0x173f) "Hanunoo" + Buhid -> BlockDefinition (0x1740, 0x175f) "Buhid" + Tagbanwa -> BlockDefinition (0x1760, 0x177f) "Tagbanwa" + Khmer -> BlockDefinition (0x1780, 0x17ff) "Khmer" + Mongolian -> BlockDefinition (0x1800, 0x18af) "Mongolian" + Unified_Canadian_Aboriginal_Syllabics_Extended -> BlockDefinition (0x18b0, 0x18ff) "Unified Canadian Aboriginal Syllabics Extended" + Limbu -> BlockDefinition (0x1900, 0x194f) "Limbu" + Tai_Le -> BlockDefinition (0x1950, 0x197f) "Tai Le" + New_Tai_Lue -> BlockDefinition (0x1980, 0x19df) "New Tai Lue" + Khmer_Symbols -> BlockDefinition (0x19e0, 0x19ff) "Khmer Symbols" + Buginese -> BlockDefinition (0x1a00, 0x1a1f) "Buginese" + Tai_Tham -> BlockDefinition (0x1a20, 0x1aaf) "Tai Tham" + Combining_Diacritical_Marks_Extended -> BlockDefinition (0x1ab0, 0x1aff) "Combining Diacritical Marks Extended" + Balinese -> BlockDefinition (0x1b00, 0x1b7f) "Balinese" + Sundanese -> BlockDefinition (0x1b80, 0x1bbf) "Sundanese" + Batak -> BlockDefinition (0x1bc0, 0x1bff) "Batak" + Lepcha -> BlockDefinition (0x1c00, 0x1c4f) "Lepcha" + Ol_Chiki -> BlockDefinition (0x1c50, 0x1c7f) "Ol Chiki" + Cyrillic_Extended_C -> BlockDefinition (0x1c80, 0x1c8f) "Cyrillic Extended-C" + Georgian_Extended -> BlockDefinition (0x1c90, 0x1cbf) "Georgian Extended" + Sundanese_Supplement -> BlockDefinition (0x1cc0, 0x1ccf) "Sundanese Supplement" + Vedic_Extensions -> BlockDefinition (0x1cd0, 0x1cff) "Vedic Extensions" + Phonetic_Extensions -> BlockDefinition (0x1d00, 0x1d7f) "Phonetic Extensions" + Phonetic_Extensions_Supplement -> BlockDefinition (0x1d80, 0x1dbf) "Phonetic Extensions Supplement" + Combining_Diacritical_Marks_Supplement -> BlockDefinition (0x1dc0, 0x1dff) "Combining Diacritical Marks Supplement" + Latin_Extended_Additional -> BlockDefinition (0x1e00, 0x1eff) "Latin Extended Additional" + Greek_Extended -> BlockDefinition (0x1f00, 0x1fff) "Greek Extended" + General_Punctuation -> BlockDefinition (0x2000, 0x206f) "General Punctuation" + Superscripts_and_Subscripts -> BlockDefinition (0x2070, 0x209f) "Superscripts and Subscripts" + Currency_Symbols -> BlockDefinition (0x20a0, 0x20cf) "Currency Symbols" + Combining_Diacritical_Marks_for_Symbols -> BlockDefinition (0x20d0, 0x20ff) "Combining Diacritical Marks for Symbols" + Letterlike_Symbols -> BlockDefinition (0x2100, 0x214f) "Letterlike Symbols" + Number_Forms -> BlockDefinition (0x2150, 0x218f) "Number Forms" + Arrows -> BlockDefinition (0x2190, 0x21ff) "Arrows" + Mathematical_Operators -> BlockDefinition (0x2200, 0x22ff) "Mathematical Operators" + Miscellaneous_Technical -> BlockDefinition (0x2300, 0x23ff) "Miscellaneous Technical" + Control_Pictures -> BlockDefinition (0x2400, 0x243f) "Control Pictures" + Optical_Character_Recognition -> BlockDefinition (0x2440, 0x245f) "Optical Character Recognition" + Enclosed_Alphanumerics -> BlockDefinition (0x2460, 0x24ff) "Enclosed Alphanumerics" + Box_Drawing -> BlockDefinition (0x2500, 0x257f) "Box Drawing" + Block_Elements -> BlockDefinition (0x2580, 0x259f) "Block Elements" + Geometric_Shapes -> BlockDefinition (0x25a0, 0x25ff) "Geometric Shapes" + Miscellaneous_Symbols -> BlockDefinition (0x2600, 0x26ff) "Miscellaneous Symbols" + Dingbats -> BlockDefinition (0x2700, 0x27bf) "Dingbats" + Miscellaneous_Mathematical_Symbols_A -> BlockDefinition (0x27c0, 0x27ef) "Miscellaneous Mathematical Symbols-A" + Supplemental_Arrows_A -> BlockDefinition (0x27f0, 0x27ff) "Supplemental Arrows-A" + Braille_Patterns -> BlockDefinition (0x2800, 0x28ff) "Braille Patterns" + Supplemental_Arrows_B -> BlockDefinition (0x2900, 0x297f) "Supplemental Arrows-B" + Miscellaneous_Mathematical_Symbols_B -> BlockDefinition (0x2980, 0x29ff) "Miscellaneous Mathematical Symbols-B" + Supplemental_Mathematical_Operators -> BlockDefinition (0x2a00, 0x2aff) "Supplemental Mathematical Operators" + Miscellaneous_Symbols_and_Arrows -> BlockDefinition (0x2b00, 0x2bff) "Miscellaneous Symbols and Arrows" + Glagolitic -> BlockDefinition (0x2c00, 0x2c5f) "Glagolitic" + Latin_Extended_C -> BlockDefinition (0x2c60, 0x2c7f) "Latin Extended-C" + Coptic -> BlockDefinition (0x2c80, 0x2cff) "Coptic" + Georgian_Supplement -> BlockDefinition (0x2d00, 0x2d2f) "Georgian Supplement" + Tifinagh -> BlockDefinition (0x2d30, 0x2d7f) "Tifinagh" + Ethiopic_Extended -> BlockDefinition (0x2d80, 0x2ddf) "Ethiopic Extended" + Cyrillic_Extended_A -> BlockDefinition (0x2de0, 0x2dff) "Cyrillic Extended-A" + Supplemental_Punctuation -> BlockDefinition (0x2e00, 0x2e7f) "Supplemental Punctuation" + CJK_Radicals_Supplement -> BlockDefinition (0x2e80, 0x2eff) "CJK Radicals Supplement" + Kangxi_Radicals -> BlockDefinition (0x2f00, 0x2fdf) "Kangxi Radicals" + Ideographic_Description_Characters -> BlockDefinition (0x2ff0, 0x2fff) "Ideographic Description Characters" + CJK_Symbols_and_Punctuation -> BlockDefinition (0x3000, 0x303f) "CJK Symbols and Punctuation" + Hiragana -> BlockDefinition (0x3040, 0x309f) "Hiragana" + Katakana -> BlockDefinition (0x30a0, 0x30ff) "Katakana" + Bopomofo -> BlockDefinition (0x3100, 0x312f) "Bopomofo" + Hangul_Compatibility_Jamo -> BlockDefinition (0x3130, 0x318f) "Hangul Compatibility Jamo" + Kanbun -> BlockDefinition (0x3190, 0x319f) "Kanbun" + Bopomofo_Extended -> BlockDefinition (0x31a0, 0x31bf) "Bopomofo Extended" + CJK_Strokes -> BlockDefinition (0x31c0, 0x31ef) "CJK Strokes" + Katakana_Phonetic_Extensions -> BlockDefinition (0x31f0, 0x31ff) "Katakana Phonetic Extensions" + Enclosed_CJK_Letters_and_Months -> BlockDefinition (0x3200, 0x32ff) "Enclosed CJK Letters and Months" + CJK_Compatibility -> BlockDefinition (0x3300, 0x33ff) "CJK Compatibility" + CJK_Unified_Ideographs_Extension_A -> BlockDefinition (0x3400, 0x4dbf) "CJK Unified Ideographs Extension A" + Yijing_Hexagram_Symbols -> BlockDefinition (0x4dc0, 0x4dff) "Yijing Hexagram Symbols" + CJK_Unified_Ideographs -> BlockDefinition (0x4e00, 0x9fff) "CJK Unified Ideographs" + Yi_Syllables -> BlockDefinition (0xa000, 0xa48f) "Yi Syllables" + Yi_Radicals -> BlockDefinition (0xa490, 0xa4cf) "Yi Radicals" + Lisu -> BlockDefinition (0xa4d0, 0xa4ff) "Lisu" + Vai -> BlockDefinition (0xa500, 0xa63f) "Vai" + Cyrillic_Extended_B -> BlockDefinition (0xa640, 0xa69f) "Cyrillic Extended-B" + Bamum -> BlockDefinition (0xa6a0, 0xa6ff) "Bamum" + Modifier_Tone_Letters -> BlockDefinition (0xa700, 0xa71f) "Modifier Tone Letters" + Latin_Extended_D -> BlockDefinition (0xa720, 0xa7ff) "Latin Extended-D" + Syloti_Nagri -> BlockDefinition (0xa800, 0xa82f) "Syloti Nagri" + Common_Indic_Number_Forms -> BlockDefinition (0xa830, 0xa83f) "Common Indic Number Forms" + Phags_pa -> BlockDefinition (0xa840, 0xa87f) "Phags-pa" + Saurashtra -> BlockDefinition (0xa880, 0xa8df) "Saurashtra" + Devanagari_Extended -> BlockDefinition (0xa8e0, 0xa8ff) "Devanagari Extended" + Kayah_Li -> BlockDefinition (0xa900, 0xa92f) "Kayah Li" + Rejang -> BlockDefinition (0xa930, 0xa95f) "Rejang" + Hangul_Jamo_Extended_A -> BlockDefinition (0xa960, 0xa97f) "Hangul Jamo Extended-A" + Javanese -> BlockDefinition (0xa980, 0xa9df) "Javanese" + Myanmar_Extended_B -> BlockDefinition (0xa9e0, 0xa9ff) "Myanmar Extended-B" + Cham -> BlockDefinition (0xaa00, 0xaa5f) "Cham" + Myanmar_Extended_A -> BlockDefinition (0xaa60, 0xaa7f) "Myanmar Extended-A" + Tai_Viet -> BlockDefinition (0xaa80, 0xaadf) "Tai Viet" + Meetei_Mayek_Extensions -> BlockDefinition (0xaae0, 0xaaff) "Meetei Mayek Extensions" + Ethiopic_Extended_A -> BlockDefinition (0xab00, 0xab2f) "Ethiopic Extended-A" + Latin_Extended_E -> BlockDefinition (0xab30, 0xab6f) "Latin Extended-E" + Cherokee_Supplement -> BlockDefinition (0xab70, 0xabbf) "Cherokee Supplement" + Meetei_Mayek -> BlockDefinition (0xabc0, 0xabff) "Meetei Mayek" + Hangul_Syllables -> BlockDefinition (0xac00, 0xd7af) "Hangul Syllables" + Hangul_Jamo_Extended_B -> BlockDefinition (0xd7b0, 0xd7ff) "Hangul Jamo Extended-B" + High_Surrogates -> BlockDefinition (0xd800, 0xdb7f) "High Surrogates" + High_Private_Use_Surrogates -> BlockDefinition (0xdb80, 0xdbff) "High Private Use Surrogates" + Low_Surrogates -> BlockDefinition (0xdc00, 0xdfff) "Low Surrogates" + Private_Use_Area -> BlockDefinition (0xe000, 0xf8ff) "Private Use Area" + CJK_Compatibility_Ideographs -> BlockDefinition (0xf900, 0xfaff) "CJK Compatibility Ideographs" + Alphabetic_Presentation_Forms -> BlockDefinition (0xfb00, 0xfb4f) "Alphabetic Presentation Forms" + Arabic_Presentation_Forms_A -> BlockDefinition (0xfb50, 0xfdff) "Arabic Presentation Forms-A" + Variation_Selectors -> BlockDefinition (0xfe00, 0xfe0f) "Variation Selectors" + Vertical_Forms -> BlockDefinition (0xfe10, 0xfe1f) "Vertical Forms" + Combining_Half_Marks -> BlockDefinition (0xfe20, 0xfe2f) "Combining Half Marks" + CJK_Compatibility_Forms -> BlockDefinition (0xfe30, 0xfe4f) "CJK Compatibility Forms" + Small_Form_Variants -> BlockDefinition (0xfe50, 0xfe6f) "Small Form Variants" + Arabic_Presentation_Forms_B -> BlockDefinition (0xfe70, 0xfeff) "Arabic Presentation Forms-B" + Halfwidth_and_Fullwidth_Forms -> BlockDefinition (0xff00, 0xffef) "Halfwidth and Fullwidth Forms" + Specials -> BlockDefinition (0xfff0, 0xffff) "Specials" + Linear_B_Syllabary -> BlockDefinition (0x10000, 0x1007f) "Linear B Syllabary" + Linear_B_Ideograms -> BlockDefinition (0x10080, 0x100ff) "Linear B Ideograms" + Aegean_Numbers -> BlockDefinition (0x10100, 0x1013f) "Aegean Numbers" + Ancient_Greek_Numbers -> BlockDefinition (0x10140, 0x1018f) "Ancient Greek Numbers" + Ancient_Symbols -> BlockDefinition (0x10190, 0x101cf) "Ancient Symbols" + Phaistos_Disc -> BlockDefinition (0x101d0, 0x101ff) "Phaistos Disc" + Lycian -> BlockDefinition (0x10280, 0x1029f) "Lycian" + Carian -> BlockDefinition (0x102a0, 0x102df) "Carian" + Coptic_Epact_Numbers -> BlockDefinition (0x102e0, 0x102ff) "Coptic Epact Numbers" + Old_Italic -> BlockDefinition (0x10300, 0x1032f) "Old Italic" + Gothic -> BlockDefinition (0x10330, 0x1034f) "Gothic" + Old_Permic -> BlockDefinition (0x10350, 0x1037f) "Old Permic" + Ugaritic -> BlockDefinition (0x10380, 0x1039f) "Ugaritic" + Old_Persian -> BlockDefinition (0x103a0, 0x103df) "Old Persian" + Deseret -> BlockDefinition (0x10400, 0x1044f) "Deseret" + Shavian -> BlockDefinition (0x10450, 0x1047f) "Shavian" + Osmanya -> BlockDefinition (0x10480, 0x104af) "Osmanya" + Osage -> BlockDefinition (0x104b0, 0x104ff) "Osage" + Elbasan -> BlockDefinition (0x10500, 0x1052f) "Elbasan" + Caucasian_Albanian -> BlockDefinition (0x10530, 0x1056f) "Caucasian Albanian" + Vithkuqi -> BlockDefinition (0x10570, 0x105bf) "Vithkuqi" + Linear_A -> BlockDefinition (0x10600, 0x1077f) "Linear A" + Latin_Extended_F -> BlockDefinition (0x10780, 0x107bf) "Latin Extended-F" + Cypriot_Syllabary -> BlockDefinition (0x10800, 0x1083f) "Cypriot Syllabary" + Imperial_Aramaic -> BlockDefinition (0x10840, 0x1085f) "Imperial Aramaic" + Palmyrene -> BlockDefinition (0x10860, 0x1087f) "Palmyrene" + Nabataean -> BlockDefinition (0x10880, 0x108af) "Nabataean" + Hatran -> BlockDefinition (0x108e0, 0x108ff) "Hatran" + Phoenician -> BlockDefinition (0x10900, 0x1091f) "Phoenician" + Lydian -> BlockDefinition (0x10920, 0x1093f) "Lydian" + Meroitic_Hieroglyphs -> BlockDefinition (0x10980, 0x1099f) "Meroitic Hieroglyphs" + Meroitic_Cursive -> BlockDefinition (0x109a0, 0x109ff) "Meroitic Cursive" + Kharoshthi -> BlockDefinition (0x10a00, 0x10a5f) "Kharoshthi" + Old_South_Arabian -> BlockDefinition (0x10a60, 0x10a7f) "Old South Arabian" + Old_North_Arabian -> BlockDefinition (0x10a80, 0x10a9f) "Old North Arabian" + Manichaean -> BlockDefinition (0x10ac0, 0x10aff) "Manichaean" + Avestan -> BlockDefinition (0x10b00, 0x10b3f) "Avestan" + Inscriptional_Parthian -> BlockDefinition (0x10b40, 0x10b5f) "Inscriptional Parthian" + Inscriptional_Pahlavi -> BlockDefinition (0x10b60, 0x10b7f) "Inscriptional Pahlavi" + Psalter_Pahlavi -> BlockDefinition (0x10b80, 0x10baf) "Psalter Pahlavi" + Old_Turkic -> BlockDefinition (0x10c00, 0x10c4f) "Old Turkic" + Old_Hungarian -> BlockDefinition (0x10c80, 0x10cff) "Old Hungarian" + Hanifi_Rohingya -> BlockDefinition (0x10d00, 0x10d3f) "Hanifi Rohingya" + Rumi_Numeral_Symbols -> BlockDefinition (0x10e60, 0x10e7f) "Rumi Numeral Symbols" + Yezidi -> BlockDefinition (0x10e80, 0x10ebf) "Yezidi" + Old_Sogdian -> BlockDefinition (0x10f00, 0x10f2f) "Old Sogdian" + Sogdian -> BlockDefinition (0x10f30, 0x10f6f) "Sogdian" + Old_Uyghur -> BlockDefinition (0x10f70, 0x10faf) "Old Uyghur" + Chorasmian -> BlockDefinition (0x10fb0, 0x10fdf) "Chorasmian" + Elymaic -> BlockDefinition (0x10fe0, 0x10fff) "Elymaic" + Brahmi -> BlockDefinition (0x11000, 0x1107f) "Brahmi" + Kaithi -> BlockDefinition (0x11080, 0x110cf) "Kaithi" + Sora_Sompeng -> BlockDefinition (0x110d0, 0x110ff) "Sora Sompeng" + Chakma -> BlockDefinition (0x11100, 0x1114f) "Chakma" + Mahajani -> BlockDefinition (0x11150, 0x1117f) "Mahajani" + Sharada -> BlockDefinition (0x11180, 0x111df) "Sharada" + Sinhala_Archaic_Numbers -> BlockDefinition (0x111e0, 0x111ff) "Sinhala Archaic Numbers" + Khojki -> BlockDefinition (0x11200, 0x1124f) "Khojki" + Multani -> BlockDefinition (0x11280, 0x112af) "Multani" + Khudawadi -> BlockDefinition (0x112b0, 0x112ff) "Khudawadi" + Grantha -> BlockDefinition (0x11300, 0x1137f) "Grantha" + Newa -> BlockDefinition (0x11400, 0x1147f) "Newa" + Tirhuta -> BlockDefinition (0x11480, 0x114df) "Tirhuta" + Siddham -> BlockDefinition (0x11580, 0x115ff) "Siddham" + Modi -> BlockDefinition (0x11600, 0x1165f) "Modi" + Mongolian_Supplement -> BlockDefinition (0x11660, 0x1167f) "Mongolian Supplement" + Takri -> BlockDefinition (0x11680, 0x116cf) "Takri" + Ahom -> BlockDefinition (0x11700, 0x1174f) "Ahom" + Dogra -> BlockDefinition (0x11800, 0x1184f) "Dogra" + Warang_Citi -> BlockDefinition (0x118a0, 0x118ff) "Warang Citi" + Dives_Akuru -> BlockDefinition (0x11900, 0x1195f) "Dives Akuru" + Nandinagari -> BlockDefinition (0x119a0, 0x119ff) "Nandinagari" + Zanabazar_Square -> BlockDefinition (0x11a00, 0x11a4f) "Zanabazar Square" + Soyombo -> BlockDefinition (0x11a50, 0x11aaf) "Soyombo" + Unified_Canadian_Aboriginal_Syllabics_Extended_A -> BlockDefinition (0x11ab0, 0x11abf) "Unified Canadian Aboriginal Syllabics Extended-A" + Pau_Cin_Hau -> BlockDefinition (0x11ac0, 0x11aff) "Pau Cin Hau" + Bhaiksuki -> BlockDefinition (0x11c00, 0x11c6f) "Bhaiksuki" + Marchen -> BlockDefinition (0x11c70, 0x11cbf) "Marchen" + Masaram_Gondi -> BlockDefinition (0x11d00, 0x11d5f) "Masaram Gondi" + Gunjala_Gondi -> BlockDefinition (0x11d60, 0x11daf) "Gunjala Gondi" + Makasar -> BlockDefinition (0x11ee0, 0x11eff) "Makasar" + Lisu_Supplement -> BlockDefinition (0x11fb0, 0x11fbf) "Lisu Supplement" + Tamil_Supplement -> BlockDefinition (0x11fc0, 0x11fff) "Tamil Supplement" + Cuneiform -> BlockDefinition (0x12000, 0x123ff) "Cuneiform" + Cuneiform_Numbers_and_Punctuation -> BlockDefinition (0x12400, 0x1247f) "Cuneiform Numbers and Punctuation" + Early_Dynastic_Cuneiform -> BlockDefinition (0x12480, 0x1254f) "Early Dynastic Cuneiform" + Cypro_Minoan -> BlockDefinition (0x12f90, 0x12fff) "Cypro-Minoan" + Egyptian_Hieroglyphs -> BlockDefinition (0x13000, 0x1342f) "Egyptian Hieroglyphs" + Egyptian_Hieroglyph_Format_Controls -> BlockDefinition (0x13430, 0x1343f) "Egyptian Hieroglyph Format Controls" + Anatolian_Hieroglyphs -> BlockDefinition (0x14400, 0x1467f) "Anatolian Hieroglyphs" + Bamum_Supplement -> BlockDefinition (0x16800, 0x16a3f) "Bamum Supplement" + Mro -> BlockDefinition (0x16a40, 0x16a6f) "Mro" + Tangsa -> BlockDefinition (0x16a70, 0x16acf) "Tangsa" + Bassa_Vah -> BlockDefinition (0x16ad0, 0x16aff) "Bassa Vah" + Pahawh_Hmong -> BlockDefinition (0x16b00, 0x16b8f) "Pahawh Hmong" + Medefaidrin -> BlockDefinition (0x16e40, 0x16e9f) "Medefaidrin" + Miao -> BlockDefinition (0x16f00, 0x16f9f) "Miao" + Ideographic_Symbols_and_Punctuation -> BlockDefinition (0x16fe0, 0x16fff) "Ideographic Symbols and Punctuation" + Tangut -> BlockDefinition (0x17000, 0x187ff) "Tangut" + Tangut_Components -> BlockDefinition (0x18800, 0x18aff) "Tangut Components" + Khitan_Small_Script -> BlockDefinition (0x18b00, 0x18cff) "Khitan Small Script" + Tangut_Supplement -> BlockDefinition (0x18d00, 0x18d7f) "Tangut Supplement" + Kana_Extended_B -> BlockDefinition (0x1aff0, 0x1afff) "Kana Extended-B" + Kana_Supplement -> BlockDefinition (0x1b000, 0x1b0ff) "Kana Supplement" + Kana_Extended_A -> BlockDefinition (0x1b100, 0x1b12f) "Kana Extended-A" + Small_Kana_Extension -> BlockDefinition (0x1b130, 0x1b16f) "Small Kana Extension" + Nushu -> BlockDefinition (0x1b170, 0x1b2ff) "Nushu" + Duployan -> BlockDefinition (0x1bc00, 0x1bc9f) "Duployan" + Shorthand_Format_Controls -> BlockDefinition (0x1bca0, 0x1bcaf) "Shorthand Format Controls" + Znamenny_Musical_Notation -> BlockDefinition (0x1cf00, 0x1cfcf) "Znamenny Musical Notation" + Byzantine_Musical_Symbols -> BlockDefinition (0x1d000, 0x1d0ff) "Byzantine Musical Symbols" + Musical_Symbols -> BlockDefinition (0x1d100, 0x1d1ff) "Musical Symbols" + Ancient_Greek_Musical_Notation -> BlockDefinition (0x1d200, 0x1d24f) "Ancient Greek Musical Notation" + Mayan_Numerals -> BlockDefinition (0x1d2e0, 0x1d2ff) "Mayan Numerals" + Tai_Xuan_Jing_Symbols -> BlockDefinition (0x1d300, 0x1d35f) "Tai Xuan Jing Symbols" + Counting_Rod_Numerals -> BlockDefinition (0x1d360, 0x1d37f) "Counting Rod Numerals" + Mathematical_Alphanumeric_Symbols -> BlockDefinition (0x1d400, 0x1d7ff) "Mathematical Alphanumeric Symbols" + Sutton_SignWriting -> BlockDefinition (0x1d800, 0x1daaf) "Sutton SignWriting" + Latin_Extended_G -> BlockDefinition (0x1df00, 0x1dfff) "Latin Extended-G" + Glagolitic_Supplement -> BlockDefinition (0x1e000, 0x1e02f) "Glagolitic Supplement" + Nyiakeng_Puachue_Hmong -> BlockDefinition (0x1e100, 0x1e14f) "Nyiakeng Puachue Hmong" + Toto -> BlockDefinition (0x1e290, 0x1e2bf) "Toto" + Wancho -> BlockDefinition (0x1e2c0, 0x1e2ff) "Wancho" + Ethiopic_Extended_B -> BlockDefinition (0x1e7e0, 0x1e7ff) "Ethiopic Extended-B" + Mende_Kikakui -> BlockDefinition (0x1e800, 0x1e8df) "Mende Kikakui" + Adlam -> BlockDefinition (0x1e900, 0x1e95f) "Adlam" + Indic_Siyaq_Numbers -> BlockDefinition (0x1ec70, 0x1ecbf) "Indic Siyaq Numbers" + Ottoman_Siyaq_Numbers -> BlockDefinition (0x1ed00, 0x1ed4f) "Ottoman Siyaq Numbers" + Arabic_Mathematical_Alphabetic_Symbols -> BlockDefinition (0x1ee00, 0x1eeff) "Arabic Mathematical Alphabetic Symbols" + Mahjong_Tiles -> BlockDefinition (0x1f000, 0x1f02f) "Mahjong Tiles" + Domino_Tiles -> BlockDefinition (0x1f030, 0x1f09f) "Domino Tiles" + Playing_Cards -> BlockDefinition (0x1f0a0, 0x1f0ff) "Playing Cards" + Enclosed_Alphanumeric_Supplement -> BlockDefinition (0x1f100, 0x1f1ff) "Enclosed Alphanumeric Supplement" + Enclosed_Ideographic_Supplement -> BlockDefinition (0x1f200, 0x1f2ff) "Enclosed Ideographic Supplement" + Miscellaneous_Symbols_and_Pictographs -> BlockDefinition (0x1f300, 0x1f5ff) "Miscellaneous Symbols and Pictographs" + Emoticons -> BlockDefinition (0x1f600, 0x1f64f) "Emoticons" + Ornamental_Dingbats -> BlockDefinition (0x1f650, 0x1f67f) "Ornamental Dingbats" + Transport_and_Map_Symbols -> BlockDefinition (0x1f680, 0x1f6ff) "Transport and Map Symbols" + Alchemical_Symbols -> BlockDefinition (0x1f700, 0x1f77f) "Alchemical Symbols" + Geometric_Shapes_Extended -> BlockDefinition (0x1f780, 0x1f7ff) "Geometric Shapes Extended" + Supplemental_Arrows_C -> BlockDefinition (0x1f800, 0x1f8ff) "Supplemental Arrows-C" + Supplemental_Symbols_and_Pictographs -> BlockDefinition (0x1f900, 0x1f9ff) "Supplemental Symbols and Pictographs" + Chess_Symbols -> BlockDefinition (0x1fa00, 0x1fa6f) "Chess Symbols" + Symbols_and_Pictographs_Extended_A -> BlockDefinition (0x1fa70, 0x1faff) "Symbols and Pictographs Extended-A" + Symbols_for_Legacy_Computing -> BlockDefinition (0x1fb00, 0x1fbff) "Symbols for Legacy Computing" + CJK_Unified_Ideographs_Extension_B -> BlockDefinition (0x20000, 0x2a6df) "CJK Unified Ideographs Extension B" + CJK_Unified_Ideographs_Extension_C -> BlockDefinition (0x2a700, 0x2b73f) "CJK Unified Ideographs Extension C" + CJK_Unified_Ideographs_Extension_D -> BlockDefinition (0x2b740, 0x2b81f) "CJK Unified Ideographs Extension D" + CJK_Unified_Ideographs_Extension_E -> BlockDefinition (0x2b820, 0x2ceaf) "CJK Unified Ideographs Extension E" + CJK_Unified_Ideographs_Extension_F -> BlockDefinition (0x2ceb0, 0x2ebef) "CJK Unified Ideographs Extension F" + CJK_Compatibility_Ideographs_Supplement -> BlockDefinition (0x2f800, 0x2fa1f) "CJK Compatibility Ideographs Supplement" + CJK_Unified_Ideographs_Extension_G -> BlockDefinition (0x30000, 0x3134f) "CJK Unified Ideographs Extension G" + Tags -> BlockDefinition (0xe0000, 0xe007f) "Tags" + Variation_Selectors_Supplement -> BlockDefinition (0xe0100, 0xe01ef) "Variation Selectors Supplement" + Supplementary_Private_Use_Area_A -> BlockDefinition (0xf0000, 0xfffff) "Supplementary Private Use Area-A" + Supplementary_Private_Use_Area_B -> BlockDefinition (0x100000, 0x10ffff) "Supplementary Private Use Area-B" + +-- [TODO] @since +-- | All the block ranges, in ascending order. +{-# INLINE allBlockRanges #-} +allBlockRanges :: [(Int, Int)] +allBlockRanges = + [(0,127),(128,255),(256,383),(384,591),(592,687),(688,767),(768,879),(880,1023),(1024,1279),(1280,1327),(1328,1423),(1424,1535),(1536,1791),(1792,1871),(1872,1919),(1920,1983),(1984,2047),(2048,2111),(2112,2143),(2144,2159),(2160,2207),(2208,2303),(2304,2431),(2432,2559),(2560,2687),(2688,2815),(2816,2943),(2944,3071),(3072,3199),(3200,3327),(3328,3455),(3456,3583),(3584,3711),(3712,3839),(3840,4095),(4096,4255),(4256,4351),(4352,4607),(4608,4991),(4992,5023),(5024,5119),(5120,5759),(5760,5791),(5792,5887),(5888,5919),(5920,5951),(5952,5983),(5984,6015),(6016,6143),(6144,6319),(6320,6399),(6400,6479),(6480,6527),(6528,6623),(6624,6655),(6656,6687),(6688,6831),(6832,6911),(6912,7039),(7040,7103),(7104,7167),(7168,7247),(7248,7295),(7296,7311),(7312,7359),(7360,7375),(7376,7423),(7424,7551),(7552,7615),(7616,7679),(7680,7935),(7936,8191),(8192,8303),(8304,8351),(8352,8399),(8400,8447),(8448,8527),(8528,8591),(8592,8703),(8704,8959),(8960,9215),(9216,9279),(9280,9311),(9312,9471),(9472,9599),(9600,9631),(9632,9727),(9728,9983),(9984,10175),(10176,10223),(10224,10239),(10240,10495),(10496,10623),(10624,10751),(10752,11007),(11008,11263),(11264,11359),(11360,11391),(11392,11519),(11520,11567),(11568,11647),(11648,11743),(11744,11775),(11776,11903),(11904,12031),(12032,12255),(12272,12287),(12288,12351),(12352,12447),(12448,12543),(12544,12591),(12592,12687),(12688,12703),(12704,12735),(12736,12783),(12784,12799),(12800,13055),(13056,13311),(13312,19903),(19904,19967),(19968,40959),(40960,42127),(42128,42191),(42192,42239),(42240,42559),(42560,42655),(42656,42751),(42752,42783),(42784,43007),(43008,43055),(43056,43071),(43072,43135),(43136,43231),(43232,43263),(43264,43311),(43312,43359),(43360,43391),(43392,43487),(43488,43519),(43520,43615),(43616,43647),(43648,43743),(43744,43775),(43776,43823),(43824,43887),(43888,43967),(43968,44031),(44032,55215),(55216,55295),(55296,56191),(56192,56319),(56320,57343),(57344,63743),(63744,64255),(64256,64335),(64336,65023),(65024,65039),(65040,65055),(65056,65071),(65072,65103),(65104,65135),(65136,65279),(65280,65519),(65520,65535),(65536,65663),(65664,65791),(65792,65855),(65856,65935),(65936,65999),(66000,66047),(66176,66207),(66208,66271),(66272,66303),(66304,66351),(66352,66383),(66384,66431),(66432,66463),(66464,66527),(66560,66639),(66640,66687),(66688,66735),(66736,66815),(66816,66863),(66864,66927),(66928,67007),(67072,67455),(67456,67519),(67584,67647),(67648,67679),(67680,67711),(67712,67759),(67808,67839),(67840,67871),(67872,67903),(67968,67999),(68000,68095),(68096,68191),(68192,68223),(68224,68255),(68288,68351),(68352,68415),(68416,68447),(68448,68479),(68480,68527),(68608,68687),(68736,68863),(68864,68927),(69216,69247),(69248,69311),(69376,69423),(69424,69487),(69488,69551),(69552,69599),(69600,69631),(69632,69759),(69760,69839),(69840,69887),(69888,69967),(69968,70015),(70016,70111),(70112,70143),(70144,70223),(70272,70319),(70320,70399),(70400,70527),(70656,70783),(70784,70879),(71040,71167),(71168,71263),(71264,71295),(71296,71375),(71424,71503),(71680,71759),(71840,71935),(71936,72031),(72096,72191),(72192,72271),(72272,72367),(72368,72383),(72384,72447),(72704,72815),(72816,72895),(72960,73055),(73056,73135),(73440,73471),(73648,73663),(73664,73727),(73728,74751),(74752,74879),(74880,75087),(77712,77823),(77824,78895),(78896,78911),(82944,83583),(92160,92735),(92736,92783),(92784,92879),(92880,92927),(92928,93071),(93760,93855),(93952,94111),(94176,94207),(94208,100351),(100352,101119),(101120,101631),(101632,101759),(110576,110591),(110592,110847),(110848,110895),(110896,110959),(110960,111359),(113664,113823),(113824,113839),(118528,118735),(118784,119039),(119040,119295),(119296,119375),(119520,119551),(119552,119647),(119648,119679),(119808,120831),(120832,121519),(122624,122879),(122880,122927),(123136,123215),(123536,123583),(123584,123647),(124896,124927),(124928,125151),(125184,125279),(126064,126143),(126208,126287),(126464,126719),(126976,127023),(127024,127135),(127136,127231),(127232,127487),(127488,127743),(127744,128511),(128512,128591),(128592,128639),(128640,128767),(128768,128895),(128896,129023),(129024,129279),(129280,129535),(129536,129647),(129648,129791),(129792,130047),(131072,173791),(173824,177983),(177984,178207),(178208,183983),(183984,191471),(194560,195103),(196608,201551),(917504,917631),(917760,917999),(983040,1048575),(1048576,1114111)] -- [TODO] @since -- | Character block, if defined. -block :: Char -> Maybe Block -block c - | cp <= 0x7f = Just Basic_Latin - | cp <= 0xff = Just Latin_1_Supplement - | cp <= 0x17f = Just Latin_Extended_A - | cp <= 0x24f = Just Latin_Extended_B - | cp <= 0x2af = Just IPA_Extensions - | cp <= 0x2ff = Just Spacing_Modifier_Letters - | cp <= 0x36f = Just Combining_Diacritical_Marks - | cp <= 0x3ff = Just Greek_and_Coptic - | cp <= 0x4ff = Just Cyrillic - | cp <= 0x52f = Just Cyrillic_Supplement - | cp <= 0x58f = Just Armenian - | cp <= 0x5ff = Just Hebrew - | cp <= 0x6ff = Just Arabic - | cp <= 0x74f = Just Syriac - | cp <= 0x77f = Just Arabic_Supplement - | cp <= 0x7bf = Just Thaana - | cp <= 0x7ff = Just NKo - | cp <= 0x83f = Just Samaritan - | cp <= 0x85f = Just Mandaic - | cp <= 0x86f = Just Syriac_Supplement - | cp <= 0x89f = Just Arabic_Extended_B - | cp <= 0x8ff = Just Arabic_Extended_A - | cp <= 0x97f = Just Devanagari - | cp <= 0x9ff = Just Bengali - | cp <= 0xa7f = Just Gurmukhi - | cp <= 0xaff = Just Gujarati - | cp <= 0xb7f = Just Oriya - | cp <= 0xbff = Just Tamil - | cp <= 0xc7f = Just Telugu - | cp <= 0xcff = Just Kannada - | cp <= 0xd7f = Just Malayalam - | cp <= 0xdff = Just Sinhala - | cp <= 0xe7f = Just Thai - | cp <= 0xeff = Just Lao - | cp <= 0xfff = Just Tibetan - | cp <= 0x109f = Just Myanmar - | cp <= 0x10ff = Just Georgian - | cp <= 0x11ff = Just Hangul_Jamo - | cp <= 0x137f = Just Ethiopic - | cp <= 0x139f = Just Ethiopic_Supplement - | cp <= 0x13ff = Just Cherokee - | cp <= 0x167f = Just Unified_Canadian_Aboriginal_Syllabics - | cp <= 0x169f = Just Ogham - | cp <= 0x16ff = Just Runic - | cp <= 0x171f = Just Tagalog - | cp <= 0x173f = Just Hanunoo - | cp <= 0x175f = Just Buhid - | cp <= 0x177f = Just Tagbanwa - | cp <= 0x17ff = Just Khmer - | cp <= 0x18af = Just Mongolian - | cp <= 0x18ff = Just Unified_Canadian_Aboriginal_Syllabics_Extended - | cp <= 0x194f = Just Limbu - | cp <= 0x197f = Just Tai_Le - | cp <= 0x19df = Just New_Tai_Lue - | cp <= 0x19ff = Just Khmer_Symbols - | cp <= 0x1a1f = Just Buginese - | cp <= 0x1aaf = Just Tai_Tham - | cp <= 0x1aff = Just Combining_Diacritical_Marks_Extended - | cp <= 0x1b7f = Just Balinese - | cp <= 0x1bbf = Just Sundanese - | cp <= 0x1bff = Just Batak - | cp <= 0x1c4f = Just Lepcha - | cp <= 0x1c7f = Just Ol_Chiki - | cp <= 0x1c8f = Just Cyrillic_Extended_C - | cp <= 0x1cbf = Just Georgian_Extended - | cp <= 0x1ccf = Just Sundanese_Supplement - | cp <= 0x1cff = Just Vedic_Extensions - | cp <= 0x1d7f = Just Phonetic_Extensions - | cp <= 0x1dbf = Just Phonetic_Extensions_Supplement - | cp <= 0x1dff = Just Combining_Diacritical_Marks_Supplement - | cp <= 0x1eff = Just Latin_Extended_Additional - | cp <= 0x1fff = Just Greek_Extended - | cp <= 0x206f = Just General_Punctuation - | cp <= 0x209f = Just Superscripts_and_Subscripts - | cp <= 0x20cf = Just Currency_Symbols - | cp <= 0x20ff = Just Combining_Diacritical_Marks_for_Symbols - | cp <= 0x214f = Just Letterlike_Symbols - | cp <= 0x218f = Just Number_Forms - | cp <= 0x21ff = Just Arrows - | cp <= 0x22ff = Just Mathematical_Operators - | cp <= 0x23ff = Just Miscellaneous_Technical - | cp <= 0x243f = Just Control_Pictures - | cp <= 0x245f = Just Optical_Character_Recognition - | cp <= 0x24ff = Just Enclosed_Alphanumerics - | cp <= 0x257f = Just Box_Drawing - | cp <= 0x259f = Just Block_Elements - | cp <= 0x25ff = Just Geometric_Shapes - | cp <= 0x26ff = Just Miscellaneous_Symbols - | cp <= 0x27bf = Just Dingbats - | cp <= 0x27ef = Just Miscellaneous_Mathematical_Symbols_A - | cp <= 0x27ff = Just Supplemental_Arrows_A - | cp <= 0x28ff = Just Braille_Patterns - | cp <= 0x297f = Just Supplemental_Arrows_B - | cp <= 0x29ff = Just Miscellaneous_Mathematical_Symbols_B - | cp <= 0x2aff = Just Supplemental_Mathematical_Operators - | cp <= 0x2bff = Just Miscellaneous_Symbols_and_Arrows - | cp <= 0x2c5f = Just Glagolitic - | cp <= 0x2c7f = Just Latin_Extended_C - | cp <= 0x2cff = Just Coptic - | cp <= 0x2d2f = Just Georgian_Supplement - | cp <= 0x2d7f = Just Tifinagh - | cp <= 0x2ddf = Just Ethiopic_Extended - | cp <= 0x2dff = Just Cyrillic_Extended_A - | cp <= 0x2e7f = Just Supplemental_Punctuation - | cp <= 0x2eff = Just CJK_Radicals_Supplement - | cp <= 0x2fdf = Just Kangxi_Radicals - | cp <= 0x2fef = Nothing - | cp <= 0x2fff = Just Ideographic_Description_Characters - | cp <= 0x303f = Just CJK_Symbols_and_Punctuation - | cp <= 0x309f = Just Hiragana - | cp <= 0x30ff = Just Katakana - | cp <= 0x312f = Just Bopomofo - | cp <= 0x318f = Just Hangul_Compatibility_Jamo - | cp <= 0x319f = Just Kanbun - | cp <= 0x31bf = Just Bopomofo_Extended - | cp <= 0x31ef = Just CJK_Strokes - | cp <= 0x31ff = Just Katakana_Phonetic_Extensions - | cp <= 0x32ff = Just Enclosed_CJK_Letters_and_Months - | cp <= 0x33ff = Just CJK_Compatibility - | cp <= 0x4dbf = Just CJK_Unified_Ideographs_Extension_A - | cp <= 0x4dff = Just Yijing_Hexagram_Symbols - | cp <= 0x9fff = Just CJK_Unified_Ideographs - | cp <= 0xa48f = Just Yi_Syllables - | cp <= 0xa4cf = Just Yi_Radicals - | cp <= 0xa4ff = Just Lisu - | cp <= 0xa63f = Just Vai - | cp <= 0xa69f = Just Cyrillic_Extended_B - | cp <= 0xa6ff = Just Bamum - | cp <= 0xa71f = Just Modifier_Tone_Letters - | cp <= 0xa7ff = Just Latin_Extended_D - | cp <= 0xa82f = Just Syloti_Nagri - | cp <= 0xa83f = Just Common_Indic_Number_Forms - | cp <= 0xa87f = Just Phags_pa - | cp <= 0xa8df = Just Saurashtra - | cp <= 0xa8ff = Just Devanagari_Extended - | cp <= 0xa92f = Just Kayah_Li - | cp <= 0xa95f = Just Rejang - | cp <= 0xa97f = Just Hangul_Jamo_Extended_A - | cp <= 0xa9df = Just Javanese - | cp <= 0xa9ff = Just Myanmar_Extended_B - | cp <= 0xaa5f = Just Cham - | cp <= 0xaa7f = Just Myanmar_Extended_A - | cp <= 0xaadf = Just Tai_Viet - | cp <= 0xaaff = Just Meetei_Mayek_Extensions - | cp <= 0xab2f = Just Ethiopic_Extended_A - | cp <= 0xab6f = Just Latin_Extended_E - | cp <= 0xabbf = Just Cherokee_Supplement - | cp <= 0xabff = Just Meetei_Mayek - | cp <= 0xd7af = Just Hangul_Syllables - | cp <= 0xd7ff = Just Hangul_Jamo_Extended_B - | cp <= 0xdb7f = Just High_Surrogates - | cp <= 0xdbff = Just High_Private_Use_Surrogates - | cp <= 0xdfff = Just Low_Surrogates - | cp <= 0xf8ff = Just Private_Use_Area - | cp <= 0xfaff = Just CJK_Compatibility_Ideographs - | cp <= 0xfb4f = Just Alphabetic_Presentation_Forms - | cp <= 0xfdff = Just Arabic_Presentation_Forms_A - | cp <= 0xfe0f = Just Variation_Selectors - | cp <= 0xfe1f = Just Vertical_Forms - | cp <= 0xfe2f = Just Combining_Half_Marks - | cp <= 0xfe4f = Just CJK_Compatibility_Forms - | cp <= 0xfe6f = Just Small_Form_Variants - | cp <= 0xfeff = Just Arabic_Presentation_Forms_B - | cp <= 0xffef = Just Halfwidth_and_Fullwidth_Forms - | cp <= 0xffff = Just Specials - | cp <= 0x1007f = Just Linear_B_Syllabary - | cp <= 0x100ff = Just Linear_B_Ideograms - | cp <= 0x1013f = Just Aegean_Numbers - | cp <= 0x1018f = Just Ancient_Greek_Numbers - | cp <= 0x101cf = Just Ancient_Symbols - | cp <= 0x101ff = Just Phaistos_Disc - | cp <= 0x1027f = Nothing - | cp <= 0x1029f = Just Lycian - | cp <= 0x102df = Just Carian - | cp <= 0x102ff = Just Coptic_Epact_Numbers - | cp <= 0x1032f = Just Old_Italic - | cp <= 0x1034f = Just Gothic - | cp <= 0x1037f = Just Old_Permic - | cp <= 0x1039f = Just Ugaritic - | cp <= 0x103df = Just Old_Persian - | cp <= 0x103ff = Nothing - | cp <= 0x1044f = Just Deseret - | cp <= 0x1047f = Just Shavian - | cp <= 0x104af = Just Osmanya - | cp <= 0x104ff = Just Osage - | cp <= 0x1052f = Just Elbasan - | cp <= 0x1056f = Just Caucasian_Albanian - | cp <= 0x105bf = Just Vithkuqi - | cp <= 0x105ff = Nothing - | cp <= 0x1077f = Just Linear_A - | cp <= 0x107bf = Just Latin_Extended_F - | cp <= 0x107ff = Nothing - | cp <= 0x1083f = Just Cypriot_Syllabary - | cp <= 0x1085f = Just Imperial_Aramaic - | cp <= 0x1087f = Just Palmyrene - | cp <= 0x108af = Just Nabataean - | cp <= 0x108df = Nothing - | cp <= 0x108ff = Just Hatran - | cp <= 0x1091f = Just Phoenician - | cp <= 0x1093f = Just Lydian - | cp <= 0x1097f = Nothing - | cp <= 0x1099f = Just Meroitic_Hieroglyphs - | cp <= 0x109ff = Just Meroitic_Cursive - | cp <= 0x10a5f = Just Kharoshthi - | cp <= 0x10a7f = Just Old_South_Arabian - | cp <= 0x10a9f = Just Old_North_Arabian - | cp <= 0x10abf = Nothing - | cp <= 0x10aff = Just Manichaean - | cp <= 0x10b3f = Just Avestan - | cp <= 0x10b5f = Just Inscriptional_Parthian - | cp <= 0x10b7f = Just Inscriptional_Pahlavi - | cp <= 0x10baf = Just Psalter_Pahlavi - | cp <= 0x10bff = Nothing - | cp <= 0x10c4f = Just Old_Turkic - | cp <= 0x10c7f = Nothing - | cp <= 0x10cff = Just Old_Hungarian - | cp <= 0x10d3f = Just Hanifi_Rohingya - | cp <= 0x10e5f = Nothing - | cp <= 0x10e7f = Just Rumi_Numeral_Symbols - | cp <= 0x10ebf = Just Yezidi - | cp <= 0x10eff = Nothing - | cp <= 0x10f2f = Just Old_Sogdian - | cp <= 0x10f6f = Just Sogdian - | cp <= 0x10faf = Just Old_Uyghur - | cp <= 0x10fdf = Just Chorasmian - | cp <= 0x10fff = Just Elymaic - | cp <= 0x1107f = Just Brahmi - | cp <= 0x110cf = Just Kaithi - | cp <= 0x110ff = Just Sora_Sompeng - | cp <= 0x1114f = Just Chakma - | cp <= 0x1117f = Just Mahajani - | cp <= 0x111df = Just Sharada - | cp <= 0x111ff = Just Sinhala_Archaic_Numbers - | cp <= 0x1124f = Just Khojki - | cp <= 0x1127f = Nothing - | cp <= 0x112af = Just Multani - | cp <= 0x112ff = Just Khudawadi - | cp <= 0x1137f = Just Grantha - | cp <= 0x113ff = Nothing - | cp <= 0x1147f = Just Newa - | cp <= 0x114df = Just Tirhuta - | cp <= 0x1157f = Nothing - | cp <= 0x115ff = Just Siddham - | cp <= 0x1165f = Just Modi - | cp <= 0x1167f = Just Mongolian_Supplement - | cp <= 0x116cf = Just Takri - | cp <= 0x116ff = Nothing - | cp <= 0x1174f = Just Ahom - | cp <= 0x117ff = Nothing - | cp <= 0x1184f = Just Dogra - | cp <= 0x1189f = Nothing - | cp <= 0x118ff = Just Warang_Citi - | cp <= 0x1195f = Just Dives_Akuru - | cp <= 0x1199f = Nothing - | cp <= 0x119ff = Just Nandinagari - | cp <= 0x11a4f = Just Zanabazar_Square - | cp <= 0x11aaf = Just Soyombo - | cp <= 0x11abf = Just Unified_Canadian_Aboriginal_Syllabics_Extended_A - | cp <= 0x11aff = Just Pau_Cin_Hau - | cp <= 0x11bff = Nothing - | cp <= 0x11c6f = Just Bhaiksuki - | cp <= 0x11cbf = Just Marchen - | cp <= 0x11cff = Nothing - | cp <= 0x11d5f = Just Masaram_Gondi - | cp <= 0x11daf = Just Gunjala_Gondi - | cp <= 0x11edf = Nothing - | cp <= 0x11eff = Just Makasar - | cp <= 0x11faf = Nothing - | cp <= 0x11fbf = Just Lisu_Supplement - | cp <= 0x11fff = Just Tamil_Supplement - | cp <= 0x123ff = Just Cuneiform - | cp <= 0x1247f = Just Cuneiform_Numbers_and_Punctuation - | cp <= 0x1254f = Just Early_Dynastic_Cuneiform - | cp <= 0x12f8f = Nothing - | cp <= 0x12fff = Just Cypro_Minoan - | cp <= 0x1342f = Just Egyptian_Hieroglyphs - | cp <= 0x1343f = Just Egyptian_Hieroglyph_Format_Controls - | cp <= 0x143ff = Nothing - | cp <= 0x1467f = Just Anatolian_Hieroglyphs - | cp <= 0x167ff = Nothing - | cp <= 0x16a3f = Just Bamum_Supplement - | cp <= 0x16a6f = Just Mro - | cp <= 0x16acf = Just Tangsa - | cp <= 0x16aff = Just Bassa_Vah - | cp <= 0x16b8f = Just Pahawh_Hmong - | cp <= 0x16e3f = Nothing - | cp <= 0x16e9f = Just Medefaidrin - | cp <= 0x16eff = Nothing - | cp <= 0x16f9f = Just Miao - | cp <= 0x16fdf = Nothing - | cp <= 0x16fff = Just Ideographic_Symbols_and_Punctuation - | cp <= 0x187ff = Just Tangut - | cp <= 0x18aff = Just Tangut_Components - | cp <= 0x18cff = Just Khitan_Small_Script - | cp <= 0x18d7f = Just Tangut_Supplement - | cp <= 0x1afef = Nothing - | cp <= 0x1afff = Just Kana_Extended_B - | cp <= 0x1b0ff = Just Kana_Supplement - | cp <= 0x1b12f = Just Kana_Extended_A - | cp <= 0x1b16f = Just Small_Kana_Extension - | cp <= 0x1b2ff = Just Nushu - | cp <= 0x1bbff = Nothing - | cp <= 0x1bc9f = Just Duployan - | cp <= 0x1bcaf = Just Shorthand_Format_Controls - | cp <= 0x1ceff = Nothing - | cp <= 0x1cfcf = Just Znamenny_Musical_Notation - | cp <= 0x1cfff = Nothing - | cp <= 0x1d0ff = Just Byzantine_Musical_Symbols - | cp <= 0x1d1ff = Just Musical_Symbols - | cp <= 0x1d24f = Just Ancient_Greek_Musical_Notation - | cp <= 0x1d2df = Nothing - | cp <= 0x1d2ff = Just Mayan_Numerals - | cp <= 0x1d35f = Just Tai_Xuan_Jing_Symbols - | cp <= 0x1d37f = Just Counting_Rod_Numerals - | cp <= 0x1d3ff = Nothing - | cp <= 0x1d7ff = Just Mathematical_Alphanumeric_Symbols - | cp <= 0x1daaf = Just Sutton_SignWriting - | cp <= 0x1deff = Nothing - | cp <= 0x1dfff = Just Latin_Extended_G - | cp <= 0x1e02f = Just Glagolitic_Supplement - | cp <= 0x1e0ff = Nothing - | cp <= 0x1e14f = Just Nyiakeng_Puachue_Hmong - | cp <= 0x1e28f = Nothing - | cp <= 0x1e2bf = Just Toto - | cp <= 0x1e2ff = Just Wancho - | cp <= 0x1e7df = Nothing - | cp <= 0x1e7ff = Just Ethiopic_Extended_B - | cp <= 0x1e8df = Just Mende_Kikakui - | cp <= 0x1e8ff = Nothing - | cp <= 0x1e95f = Just Adlam - | cp <= 0x1ec6f = Nothing - | cp <= 0x1ecbf = Just Indic_Siyaq_Numbers - | cp <= 0x1ecff = Nothing - | cp <= 0x1ed4f = Just Ottoman_Siyaq_Numbers - | cp <= 0x1edff = Nothing - | cp <= 0x1eeff = Just Arabic_Mathematical_Alphabetic_Symbols - | cp <= 0x1efff = Nothing - | cp <= 0x1f02f = Just Mahjong_Tiles - | cp <= 0x1f09f = Just Domino_Tiles - | cp <= 0x1f0ff = Just Playing_Cards - | cp <= 0x1f1ff = Just Enclosed_Alphanumeric_Supplement - | cp <= 0x1f2ff = Just Enclosed_Ideographic_Supplement - | cp <= 0x1f5ff = Just Miscellaneous_Symbols_and_Pictographs - | cp <= 0x1f64f = Just Emoticons - | cp <= 0x1f67f = Just Ornamental_Dingbats - | cp <= 0x1f6ff = Just Transport_and_Map_Symbols - | cp <= 0x1f77f = Just Alchemical_Symbols - | cp <= 0x1f7ff = Just Geometric_Shapes_Extended - | cp <= 0x1f8ff = Just Supplemental_Arrows_C - | cp <= 0x1f9ff = Just Supplemental_Symbols_and_Pictographs - | cp <= 0x1fa6f = Just Chess_Symbols - | cp <= 0x1faff = Just Symbols_and_Pictographs_Extended_A - | cp <= 0x1fbff = Just Symbols_for_Legacy_Computing - | cp <= 0x1ffff = Nothing - | cp <= 0x2a6df = Just CJK_Unified_Ideographs_Extension_B - | cp <= 0x2a6ff = Nothing - | cp <= 0x2b73f = Just CJK_Unified_Ideographs_Extension_C - | cp <= 0x2b81f = Just CJK_Unified_Ideographs_Extension_D - | cp <= 0x2ceaf = Just CJK_Unified_Ideographs_Extension_E - | cp <= 0x2ebef = Just CJK_Unified_Ideographs_Extension_F - | cp <= 0x2f7ff = Nothing - | cp <= 0x2fa1f = Just CJK_Compatibility_Ideographs_Supplement - | cp <= 0x2ffff = Nothing - | cp <= 0x3134f = Just CJK_Unified_Ideographs_Extension_G - | cp <= 0xdffff = Nothing - | cp <= 0xe007f = Just Tags - | cp <= 0xe00ff = Nothing - | cp <= 0xe01ef = Just Variation_Selectors_Supplement - | cp <= 0xeffff = Nothing - | cp <= 0xfffff = Just Supplementary_Private_Use_Area_A - | cp <= 0x10ffff = Just Supplementary_Private_Use_Area_B +block :: Char -> Maybe Int +block (C# c#) = getBlock 0# 319# + where + -- [NOTE] Encoding + -- A range is encoded as two LE Word32: + -- • First one is the lower bound, where the higher 11 bits are the block + -- index and the lower 21 bits are the codepoint. + -- • Second one is the upper bound, which correspond to the codepoint. + + cp# = int2Word# (ord# c#) + + -- Binary search + getBlock l# u# = if isTrue# (l# ># u#) + then Nothing + else + let k# = l# +# uncheckedIShiftRL# (u# -# l#) 1# + j# = k# `uncheckedIShiftL#` 1# + cpL0# = getRawCodePoint# j# + cpL# = cpL0# `and#` 0x1fffff## -- Mask for codepoint: [0..0x10fff] + cpU# = getRawCodePoint# (j# +# 1#) + in if isTrue# (cpU# `ltWord#` cp#) + -- cp > upper bound + then getBlock (k# +# 1#) u# + -- check lower bound + else if isTrue# (cp# `ltWord#` cpL#) + -- cp < lower bound + then getBlock l# (k# -# 1#) + -- cp in block: get block index + else let block# = cpL0# `uncheckedShiftRL#` 21# + in Just (I# (word2Int# block#)) + + getRawCodePoint# k# = +#ifdef WORDS_BIGENDIAN +#if MIN_VERSION_base(4,16,0) + byteSwap32# (word32ToWord# (indexWord32OffAddr# addr# k#)) +#else + byteSwap32# (indexWord32OffAddr# ranges# k#) +#endif +#elif MIN_VERSION_base(4,16,0) + word32ToWord# (indexWord32OffAddr# ranges# k#) +#else + indexWord32OffAddr# ranges# k# +#endif - | otherwise = Nothing - where cp = ord c + -- Encoded ranges + ranges# = "\0\0\0\0\127\0\0\0\128\0\32\0\255\0\0\0\0\1\64\0\127\1\0\0\128\1\96\0\79\2\0\0\80\2\128\0\175\2\0\0\176\2\160\0\255\2\0\0\0\3\192\0\111\3\0\0\112\3\224\0\255\3\0\0\0\4\0\1\255\4\0\0\0\5\32\1\47\5\0\0\48\5\64\1\143\5\0\0\144\5\96\1\255\5\0\0\0\6\128\1\255\6\0\0\0\7\160\1\79\7\0\0\80\7\192\1\127\7\0\0\128\7\224\1\191\7\0\0\192\7\0\2\255\7\0\0\0\8\32\2\63\8\0\0\64\8\64\2\95\8\0\0\96\8\96\2\111\8\0\0\112\8\128\2\159\8\0\0\160\8\160\2\255\8\0\0\0\9\192\2\127\9\0\0\128\9\224\2\255\9\0\0\0\10\0\3\127\10\0\0\128\10\32\3\255\10\0\0\0\11\64\3\127\11\0\0\128\11\96\3\255\11\0\0\0\12\128\3\127\12\0\0\128\12\160\3\255\12\0\0\0\13\192\3\127\13\0\0\128\13\224\3\255\13\0\0\0\14\0\4\127\14\0\0\128\14\32\4\255\14\0\0\0\15\64\4\255\15\0\0\0\16\96\4\159\16\0\0\160\16\128\4\255\16\0\0\0\17\160\4\255\17\0\0\0\18\192\4\127\19\0\0\128\19\224\4\159\19\0\0\160\19\0\5\255\19\0\0\0\20\32\5\127\22\0\0\128\22\64\5\159\22\0\0\160\22\96\5\255\22\0\0\0\23\128\5\31\23\0\0\32\23\160\5\63\23\0\0\64\23\192\5\95\23\0\0\96\23\224\5\127\23\0\0\128\23\0\6\255\23\0\0\0\24\32\6\175\24\0\0\176\24\64\6\255\24\0\0\0\25\96\6\79\25\0\0\80\25\128\6\127\25\0\0\128\25\160\6\223\25\0\0\224\25\192\6\255\25\0\0\0\26\224\6\31\26\0\0\32\26\0\7\175\26\0\0\176\26\32\7\255\26\0\0\0\27\64\7\127\27\0\0\128\27\96\7\191\27\0\0\192\27\128\7\255\27\0\0\0\28\160\7\79\28\0\0\80\28\192\7\127\28\0\0\128\28\224\7\143\28\0\0\144\28\0\8\191\28\0\0\192\28\32\8\207\28\0\0\208\28\64\8\255\28\0\0\0\29\96\8\127\29\0\0\128\29\128\8\191\29\0\0\192\29\160\8\255\29\0\0\0\30\192\8\255\30\0\0\0\31\224\8\255\31\0\0\0\32\0\9\111\32\0\0\112\32\32\9\159\32\0\0\160\32\64\9\207\32\0\0\208\32\96\9\255\32\0\0\0\33\128\9\79\33\0\0\80\33\160\9\143\33\0\0\144\33\192\9\255\33\0\0\0\34\224\9\255\34\0\0\0\35\0\10\255\35\0\0\0\36\32\10\63\36\0\0\64\36\64\10\95\36\0\0\96\36\96\10\255\36\0\0\0\37\128\10\127\37\0\0\128\37\160\10\159\37\0\0\160\37\192\10\255\37\0\0\0\38\224\10\255\38\0\0\0\39\0\11\191\39\0\0\192\39\32\11\239\39\0\0\240\39\64\11\255\39\0\0\0\40\96\11\255\40\0\0\0\41\128\11\127\41\0\0\128\41\160\11\255\41\0\0\0\42\192\11\255\42\0\0\0\43\224\11\255\43\0\0\0\44\0\12\95\44\0\0\96\44\32\12\127\44\0\0\128\44\64\12\255\44\0\0\0\45\96\12\47\45\0\0\48\45\128\12\127\45\0\0\128\45\160\12\223\45\0\0\224\45\192\12\255\45\0\0\0\46\224\12\127\46\0\0\128\46\0\13\255\46\0\0\0\47\32\13\223\47\0\0\240\47\64\13\255\47\0\0\0\48\96\13\63\48\0\0\64\48\128\13\159\48\0\0\160\48\160\13\255\48\0\0\0\49\192\13\47\49\0\0\48\49\224\13\143\49\0\0\144\49\0\14\159\49\0\0\160\49\32\14\191\49\0\0\192\49\64\14\239\49\0\0\240\49\96\14\255\49\0\0\0\50\128\14\255\50\0\0\0\51\160\14\255\51\0\0\0\52\192\14\191\77\0\0\192\77\224\14\255\77\0\0\0\78\0\15\255\159\0\0\0\160\32\15\143\164\0\0\144\164\64\15\207\164\0\0\208\164\96\15\255\164\0\0\0\165\128\15\63\166\0\0\64\166\160\15\159\166\0\0\160\166\192\15\255\166\0\0\0\167\224\15\31\167\0\0\32\167\0\16\255\167\0\0\0\168\32\16\47\168\0\0\48\168\64\16\63\168\0\0\64\168\96\16\127\168\0\0\128\168\128\16\223\168\0\0\224\168\160\16\255\168\0\0\0\169\192\16\47\169\0\0\48\169\224\16\95\169\0\0\96\169\0\17\127\169\0\0\128\169\32\17\223\169\0\0\224\169\64\17\255\169\0\0\0\170\96\17\95\170\0\0\96\170\128\17\127\170\0\0\128\170\160\17\223\170\0\0\224\170\192\17\255\170\0\0\0\171\224\17\47\171\0\0\48\171\0\18\111\171\0\0\112\171\32\18\191\171\0\0\192\171\64\18\255\171\0\0\0\172\96\18\175\215\0\0\176\215\128\18\255\215\0\0\0\216\160\18\127\219\0\0\128\219\192\18\255\219\0\0\0\220\224\18\255\223\0\0\0\224\0\19\255\248\0\0\0\249\32\19\255\250\0\0\0\251\64\19\79\251\0\0\80\251\96\19\255\253\0\0\0\254\128\19\15\254\0\0\16\254\160\19\31\254\0\0\32\254\192\19\47\254\0\0\48\254\224\19\79\254\0\0\80\254\0\20\111\254\0\0\112\254\32\20\255\254\0\0\0\255\64\20\239\255\0\0\240\255\96\20\255\255\0\0\0\0\129\20\127\0\1\0\128\0\161\20\255\0\1\0\0\1\193\20\63\1\1\0\64\1\225\20\143\1\1\0\144\1\1\21\207\1\1\0\208\1\33\21\255\1\1\0\128\2\65\21\159\2\1\0\160\2\97\21\223\2\1\0\224\2\129\21\255\2\1\0\0\3\161\21\47\3\1\0\48\3\193\21\79\3\1\0\80\3\225\21\127\3\1\0\128\3\1\22\159\3\1\0\160\3\33\22\223\3\1\0\0\4\65\22\79\4\1\0\80\4\97\22\127\4\1\0\128\4\129\22\175\4\1\0\176\4\161\22\255\4\1\0\0\5\193\22\47\5\1\0\48\5\225\22\111\5\1\0\112\5\1\23\191\5\1\0\0\6\33\23\127\7\1\0\128\7\65\23\191\7\1\0\0\8\97\23\63\8\1\0\64\8\129\23\95\8\1\0\96\8\161\23\127\8\1\0\128\8\193\23\175\8\1\0\224\8\225\23\255\8\1\0\0\9\1\24\31\9\1\0\32\9\33\24\63\9\1\0\128\9\65\24\159\9\1\0\160\9\97\24\255\9\1\0\0\10\129\24\95\10\1\0\96\10\161\24\127\10\1\0\128\10\193\24\159\10\1\0\192\10\225\24\255\10\1\0\0\11\1\25\63\11\1\0\64\11\33\25\95\11\1\0\96\11\65\25\127\11\1\0\128\11\97\25\175\11\1\0\0\12\129\25\79\12\1\0\128\12\161\25\255\12\1\0\0\13\193\25\63\13\1\0\96\14\225\25\127\14\1\0\128\14\1\26\191\14\1\0\0\15\33\26\47\15\1\0\48\15\65\26\111\15\1\0\112\15\97\26\175\15\1\0\176\15\129\26\223\15\1\0\224\15\161\26\255\15\1\0\0\16\193\26\127\16\1\0\128\16\225\26\207\16\1\0\208\16\1\27\255\16\1\0\0\17\33\27\79\17\1\0\80\17\65\27\127\17\1\0\128\17\97\27\223\17\1\0\224\17\129\27\255\17\1\0\0\18\161\27\79\18\1\0\128\18\193\27\175\18\1\0\176\18\225\27\255\18\1\0\0\19\1\28\127\19\1\0\0\20\33\28\127\20\1\0\128\20\65\28\223\20\1\0\128\21\97\28\255\21\1\0\0\22\129\28\95\22\1\0\96\22\161\28\127\22\1\0\128\22\193\28\207\22\1\0\0\23\225\28\79\23\1\0\0\24\1\29\79\24\1\0\160\24\33\29\255\24\1\0\0\25\65\29\95\25\1\0\160\25\97\29\255\25\1\0\0\26\129\29\79\26\1\0\80\26\161\29\175\26\1\0\176\26\193\29\191\26\1\0\192\26\225\29\255\26\1\0\0\28\1\30\111\28\1\0\112\28\33\30\191\28\1\0\0\29\65\30\95\29\1\0\96\29\97\30\175\29\1\0\224\30\129\30\255\30\1\0\176\31\161\30\191\31\1\0\192\31\193\30\255\31\1\0\0\32\225\30\255\35\1\0\0\36\1\31\127\36\1\0\128\36\33\31\79\37\1\0\144\47\65\31\255\47\1\0\0\48\97\31\47\52\1\0\48\52\129\31\63\52\1\0\0\68\161\31\127\70\1\0\0\104\193\31\63\106\1\0\64\106\225\31\111\106\1\0\112\106\1\32\207\106\1\0\208\106\33\32\255\106\1\0\0\107\65\32\143\107\1\0\64\110\97\32\159\110\1\0\0\111\129\32\159\111\1\0\224\111\161\32\255\111\1\0\0\112\193\32\255\135\1\0\0\136\225\32\255\138\1\0\0\139\1\33\255\140\1\0\0\141\33\33\127\141\1\0\240\175\65\33\255\175\1\0\0\176\97\33\255\176\1\0\0\177\129\33\47\177\1\0\48\177\161\33\111\177\1\0\112\177\193\33\255\178\1\0\0\188\225\33\159\188\1\0\160\188\1\34\175\188\1\0\0\207\33\34\207\207\1\0\0\208\65\34\255\208\1\0\0\209\97\34\255\209\1\0\0\210\129\34\79\210\1\0\224\210\161\34\255\210\1\0\0\211\193\34\95\211\1\0\96\211\225\34\127\211\1\0\0\212\1\35\255\215\1\0\0\216\33\35\175\218\1\0\0\223\65\35\255\223\1\0\0\224\97\35\47\224\1\0\0\225\129\35\79\225\1\0\144\226\161\35\191\226\1\0\192\226\193\35\255\226\1\0\224\231\225\35\255\231\1\0\0\232\1\36\223\232\1\0\0\233\33\36\95\233\1\0\112\236\65\36\191\236\1\0\0\237\97\36\79\237\1\0\0\238\129\36\255\238\1\0\0\240\161\36\47\240\1\0\48\240\193\36\159\240\1\0\160\240\225\36\255\240\1\0\0\241\1\37\255\241\1\0\0\242\33\37\255\242\1\0\0\243\65\37\255\245\1\0\0\246\97\37\79\246\1\0\80\246\129\37\127\246\1\0\128\246\161\37\255\246\1\0\0\247\193\37\127\247\1\0\128\247\225\37\255\247\1\0\0\248\1\38\255\248\1\0\0\249\33\38\255\249\1\0\0\250\65\38\111\250\1\0\112\250\97\38\255\250\1\0\0\251\129\38\255\251\1\0\0\0\162\38\223\166\2\0\0\167\194\38\63\183\2\0\64\183\226\38\31\184\2\0\32\184\2\39\175\206\2\0\176\206\34\39\239\235\2\0\0\248\66\39\31\250\2\0\0\0\99\39\79\19\3\0\0\0\142\39\127\0\14\0\0\1\174\39\239\1\14\0\0\0\207\39\255\255\15\0\0\0\240\39\255\255\16\0"#