Update haddock documentation

composewell · Jul 7, 2021 · a9bfbaa · a9bfbaa
1 parent aa272e7
commit a9bfbaa
Show file tree

Hide file tree

Showing 6 changed files with 180 additions and 81 deletions.
diff --git a/README.md b/README.md
@@ -15,14 +15,6 @@ any other packages or use cases.
 
 Please see the haddock documentation for reference documentation.
 
-## Module Structure
-
-The module structure under `Unicode.Char` is largely based on the
-["Property Index by Scope of Use" in Unicode® Standard Annex #44](https://www.unicode.org/reports/tr44/#Property_Index_Table).
-
-The module structure under `Unicode.Internal.Char` is largely based on
-the UCD file names from which the properties are generated.
-
 ## Unicode database version update
 
 To update the unicode version please update the version number in
@@ -42,7 +34,20 @@ files, run `ucd.sh generate` from the top level directory of the repo.
 $ ./ucd.sh generate
 ```
 
-## References
+## Running property doctests
+
+Temporarily add `QuickCheck` to build depends of library.
+
+```
+$ cabal build
+$ cabal-docspec --check-properties --property-variables c
+```
+
+## Licensing
+
+`unicode-data` is an [open source](https://github.com/composewell/unicode-data)
+project available under a liberal [Apache-2.0 license](LICENSE).
+
+## Contributing to Streamly
 
-* See https://www.unicode.org/reports/tr44/ to understand what the unicode
-  database files contain and their formats.
+As an open project we welcome contributions.
diff --git a/lib/Unicode/Char.hs b/lib/Unicode/Char.hs
@@ -5,9 +5,29 @@
 -- Maintainer  : [email protected]
 -- Stability   : experimental
 --
+-- This module provides APIs to access the Unicode character database (UCD)
+-- corresponding to [Unicode Standard version
+-- 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/).
+--
+-- This module re-exports several sub-modules under it.  The sub-module
+-- structure under `Unicode.Char` is largely based on the
+-- ["Property Index by Scope of Use" in Unicode® Standard Annex #44](https://www.unicode.org/reports/tr44/#Property_Index_Table).
+--
+-- The @Unicode.Char.*@ modules in turn depend on @Unicode.Internal.Char.*@
+-- modules which are programmatically generated from the Unicode standard's
+-- Unicode character database files. The module structure under
+-- @Unicode.Internal.Char@ is largely based on the UCD text file names from
+-- which the properties are generated.
+--
+-- For the original UCD files used in this code please refer to the @UCD@
+-- section on the Unicode standard page.  See
+-- https://www.unicode.org/reports/tr44/ to understand the contents and the
+-- format of the unicode database files.
+--
+
 module Unicode.Char
-    ( module Unicode.Char.Case
-    , module Unicode.Char.General
+    ( module Unicode.Char.General
+    , module Unicode.Char.Case
     , module Unicode.Char.Normalization
     )
 where

diff --git a/lib/Unicode/Char/Case.hs b/lib/Unicode/Char/Case.hs
@@ -5,6 +5,8 @@
 -- Maintainer  : [email protected]
 -- Stability   : experimental
 --
+-- Case and case mapping related functions.
+--
 module Unicode.Char.Case
     ( isLower
     , isUpper
@@ -15,7 +17,7 @@ import qualified Unicode.Internal.Char.DerivedCoreProperties as P
 
 -- | Returns 'True' for lower-case letters.
 --
--- prop> isLower == Data.Char.isLower
+-- prop> isLower c == Data.Char.isLower c
 --
 {-# INLINE isLower #-}
 isLower :: Char -> Bool
@@ -24,7 +26,7 @@ isLower = P.isLowercase
 -- | Returns 'True' for upper-case or title-case letters.  Title case is used by
 -- a small number of letter ligatures like the single-character form of /Lj/.
 --
--- prop> isUpper == Data.Char.isUpper
+-- prop> isUpper c == Data.Char.isUpper c
 --
 {-# INLINE isUpper #-}
 isUpper :: Char -> Bool

diff --git a/lib/Unicode/Char/General.hs b/lib/Unicode/Char/General.hs
@@ -5,32 +5,71 @@
 -- Maintainer  : [email protected]
 -- Stability   : experimental
 --
+-- General character property related functions.
+--
 module Unicode.Char.General
-    ( isLetter
+    (
+    -- * Character Properties
+      isLetter
     , isSpace
 
-    -- Hangul
-    , hangulFirst
-    , hangulLast
-    , isHangul
-    , isHangulLV
-
+    -- * Korean Hangul Characters
+    -- | The Hangul script used in the Korean writing system consists of
+    -- individual consonant and vowel letters (jamo) that are visually combined
+    -- into square display cells to form entire syllable  blocks.  Hangul
+    -- syllables  may  be  encoded  directly  as  precomposed  combinations of
+    -- individual jamo or as decomposed sequences of conjoining jamo. Modern
+    -- Hangul syllable blocks can be expressed with either two or three jamo,
+    -- either in the  form  consonant + vowel  or  in  the  form  consonant +
+    -- vowel + consonant. The leading consonant is represented as L, the vowel
+    -- as V and the trailing consonant as T.
+    --
+    -- The Unicode Standard contains both a large set of precomposed modern
+    -- Hangul syllables and a set of conjoining Hangul jamo, which can be used
+    -- to encode archaic Korean syllable blocks as well as modern Korean
+    -- syllable blocks.
+    --
+    -- Hangul characters can be composed or decomposed algorithmically instead
+    -- of via mappings.  These APIs are used mainly for Unicode normalization
+    -- of Hangul text.
+    --
+    -- Please refer to the following resources for more information:
+    --
+    -- * The @Hangul@ section of the @East Asia@ chapter of the [Unicode Standard](https://www.unicode.org/versions/latest)
+    -- * Conformance chapter of the [Unicode Standard](https://www.unicode.org/versions/latest)
+    -- * [Unicode® Standard Annex #15 - Unicode Normalization Forms](https://www.unicode.org/reports/tr15)
+    -- * UCD file @HangulSyllableType.txt@
+    -- * https://en.wikipedia.org/wiki/Hangul_Jamo_(Unicode_block)
+    -- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo
+
+    -- ** Conjoining Jamo
+    -- | Jamo L, V and T letters.
     , isJamo
+    , jamoNCount
+
+    -- *** Jamo Leading (L)
     , jamoLFirst
     , jamoLIndex
     , jamoLLast
 
+    -- *** Jamo Vowel (V)
     , jamoVFirst
     , jamoVCount
     , jamoVIndex
     , jamoVLast
 
+    -- *** Jamo Trailing (T)
     , jamoTFirst
     , jamoTCount
     , jamoTIndex
-    , jamoLast
+    , jamoTLast
 
-    , jamoNCount
+    -- ** Hangul Syllables
+    -- | Precomposed Hangul syllables.
+    , hangulFirst
+    , hangulLast
+    , isHangul
+    , isHangulLV
     )
 where
 
@@ -45,7 +84,7 @@ import qualified Unicode.Internal.Char.PropList as P
 -- and title-case letters, plus letters of caseless scripts and modifiers
 -- letters).
 --
--- prop> isLetter == Data.Char.isLetter
+-- prop> isLetter c == Data.Char.isLetter c
 --
 {-# INLINE isLetter #-}
 isLetter :: Char -> Bool
@@ -54,88 +93,110 @@ isLetter = P.isAlphabetic
 -- | Returns 'True' for any whitespace characters, and the control
 -- characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.
 --
--- prop> isSpace == Data.Char.isSpace
+-- prop> isSpace c == Data.Char.isSpace c
 --
 {-# INLINE isSpace #-}
 isSpace :: Char -> Bool
 isSpace = P.isWhite_Space
 
 -------------------------------------------------------------------------------
--- Hangul
+-- Korean Hangul
 -------------------------------------------------------------------------------
 
--- General utilities used by decomposition as well as composition
--- Hangul characters can be decomposed algorithmically instead of via mappings
-
--- * https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
--- * https://en.wikipedia.org/wiki/List_of_Hangul_jamo
--- * https://www.unicode.org/reports/tr15/tr15-18.html#Hangul
-
--- D134   Standard Korean syllable block: A sequence of one or more L followed
--- by a sequence of  one  or  more  V  and  a  sequence  of  zero  or  more  T,
--- or any other sequence that is canonically equivalent
-
 -- jamo leading
 jamoLFirst, jamoLCount, jamoLLast :: Int
+
+-- | First leading consonant jamo.
 jamoLFirst  = 0x1100
+
+-- | Total count of leading consonant jamo.
 jamoLCount = 19
+
+-- | Last leading consonant jamo.
 jamoLLast = jamoLFirst + jamoLCount - 1
 
 -- jamo vowel
 jamoVFirst, jamoVCount, jamoVLast :: Int
+
+-- | First vowel jamo.
 jamoVFirst  = 0x1161
+
+-- | Total count of vowel jamo.
 jamoVCount = 21
+
+-- | Last vowel jamo.
 jamoVLast = jamoVFirst + jamoVCount - 1
 
 -- jamo trailing
--- jamoTFirst does not represent a valid T, it represents a missing T i.e. LV
--- without a T. See comments under jamoTIndex .
 jamoTFirst, jamoTCount :: Int
+
+-- | The first trailing consonant jamo.
+--
+-- Note that 'jamoTFirst' does not represent a valid T, it represents a missing
+-- T i.e. LV without a T. See comments under 'jamoTIndex' .
 jamoTFirst  = 0x11a7
+
+-- | Total count of trailing consonant jamo.
 jamoTCount = 28
 
-jamoLast :: Int
-jamoLast = jamoTFirst + jamoTCount - 1
+-- | Last trailing consonant jamo.
+jamoTLast :: Int
+jamoTLast = jamoTFirst + jamoTCount - 1
 
--- VCount * TCount
+-- | Total count of all jamo characters.
+--
+-- @jamoNCount = jamoVCount * jamoTCount@
 jamoNCount :: Int
 jamoNCount = 588
 
 -- hangul
 hangulFirst, hangulLast :: Int
+
+-- | Codepoint of the first pre-composed Hangul character.
 hangulFirst = 0xac00
+
+-- | Codepoint of the last Hangul character.
 hangulLast = hangulFirst + jamoLCount * jamoVCount * jamoTCount - 1
 
+-- | Determine if the given character is a precomposed Hangul syllable.
 isHangul :: Char -> Bool
 isHangul c = n >= hangulFirst && n <= hangulLast
     where n = ord c
 
+-- | Determine if the given character is a Hangul LV syllable.
 isHangulLV :: Char -> Bool
 isHangulLV c = assert (jamoTCount == 28)
     snd (quotRem28 (ord c - hangulFirst)) == 0
 
+-- | Determine whether a character is a jamo L, V or T character.
 isJamo :: Char -> Bool
-isJamo c = n >= jamoLFirst && n <= jamoLast
+isJamo c = n >= jamoLFirst && n <= jamoTLast
     where n = ord c
 
--- if it is a jamo L char return the index
+-- | Given a Unicode character, if it is a leading jamo, return its index in
+-- the list of leading jamo consonants, otherwise return 'Nothing'.
 jamoLIndex :: Char -> Maybe Int
 jamoLIndex c
   | index >= 0 && index < jamoLCount = Just index
   | otherwise = Nothing
     where index = ord c - jamoLFirst
 
+-- | Given a Unicode character, if it is a vowel jamo, return its index in the
+-- list of vowel jamo, otherwise return 'Nothing'.
 jamoVIndex :: Char -> Maybe Int
 jamoVIndex c
   | index >= 0 && index < jamoVCount = Just index
   | otherwise = Nothing
     where index = ord c - jamoVFirst
 
+-- | Given a Unicode character, if it is a trailing jamo consonant, return its
+-- index in the list of trailing jamo consonants, otherwise return 'Nothing'.
+--
 -- Note that index 0 is not a valid index for a trailing consonant. Index 0
--- means no T, only LV syllable.
--- See Unicode 9.0.0: 3.12 (Hangul Syllable Decomposition)
--- TBase is set to one less than the beginning of the range of trailing
--- consonants, which starts at U+11A8.
+-- corresponds to an LV syllable, without a T.  See "Hangul Syllable
+-- Decomposition" in the Conformance chapter of the Unicode standard for more
+-- details.
+--
 jamoTIndex :: Char -> Maybe Int
 jamoTIndex c
   | index > 0 && index < jamoTCount = Just index