Skip to content

Commit

Permalink
Added support for the generation of UAX42
Browse files Browse the repository at this point in the history
  • Loading branch information
jowilco committed Oct 14, 2024
1 parent 8b870a6 commit d612e96
Show file tree
Hide file tree
Showing 73 changed files with 9,979 additions and 1 deletion.
23 changes: 23 additions & 0 deletions uax/uax42/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generating TR42

## Step 1 - Generate property value fragments

- Run org.unicode.xml.GeneratePropertyValues to populate the UNICODETOOLS_REPO_DIR/uax/uax42/fragments/ folder.

## Step 2 - Generate TR42 index.html and index.rnc

- In UNICODETOOLS_REPO_DIR/uax/uax42/ run `mvn xml:transform`

index.html and index.rnc will be generated in UNICODETOOLS_REPO_DIR/uax/uax42/output/

## Step 3 - Validate generated UAX XML files

You'll need a [RELAX NG](https://relaxng.org/) schema validator. We'll use [jing-trang](https://github.
com/relaxng/jing-trang) in this example.

1. Clone and build [jing-trang](https://github.com/relaxng/jing-trang)
2. Run the following:
```
java -jar C:\_git\jing-trang\build\jing.jar -c UNICODETOOLS_REPO_DIR\uax\uax42\output\index.rnc <path to UAX xml file>
```

10 changes: 10 additions & 0 deletions uax/uax42/fragments/block/block.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="blocks" id='schema.block'>
ucd.content &amp;=
element blocks {
element block {
attribute first-cp { single-code-point },
attribute last-cp { single-code-point },
attribute name { text } }+ }?
</ucdxml:block>
4 changes: 4 additions & 0 deletions uax/uax42/fragments/boolean/boolean.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="boolean" id='schema.boolean'>
boolean = "Y" | "N"
</ucdxml:block>
10 changes: 10 additions & 0 deletions uax/uax42/fragments/cjk-radicals/cjk-radicals.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="cjk radicals" id='schema.cjk-radicals'>
ucd.content &amp;=
element cjk-radicals {
element cjk-radical {
attribute number { xsd:string {pattern="[0-9]{1,3}'{0,3}"}},
attribute radical { single-code-point? },
attribute ideograph { single-code-point } }+ }?
</ucdxml:block>
9 changes: 9 additions & 0 deletions uax/uax42/fragments/datatypes/code points.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="datatype for code points" id='schema.datatypes'>
single-code-point = xsd:string { pattern = "(|[1-9A-F]|(10))[0-9A-F]{4}" }

one-or-more-code-points = list { single-code-point + }
zero-or-more-code-points = list { single-code-point * }
two-code-points = list { single-code-point, single-code-point }
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/datatypes/datatypes.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="datatypes declaration" id='schema.datatypes'>
# default; datatypes xsd = "http://www.w3.org/2001/XMLSchema-datatypes"
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/datatypes/jis-code-point.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="datatype for code points" id='schema.datatypes'>
jis-code-point = xsd:string { pattern = "[0-9A-F]{4}" }
</ucdxml:block>
6 changes: 6 additions & 0 deletions uax/uax42/fragments/description/description.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="description" id='schema.description'>
ucd.content &amp;=
element description { text }?
</ucdxml:block>
22 changes: 22 additions & 0 deletions uax/uax42/fragments/do-not-emit/do-not-emit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="do-not-emit" id='schema.do-not-emit'>
ucd.content &amp;=
element do-not-emit {
element instead {
attribute of { one-or-more-code-points },
attribute use { one-or-more-code-points },
attribute because { "Bengali_Khanda_Ta"
| "Deprecated"
| "Discouraged"
| "Dotless_Form"
| "Hamza_Form"
| "Indic_Atomic_Consonant"
| "Indic_Consonant_Conjunct"
| "Indic_Vowel_Letter"
| "Malayalam_Chillu"
| "Precomposed_Form"
| "Precomposed_Hieroglyph"
| "Preferred_Spelling"
| "Tamil_Shrii"
} }+ }?
</ucdxml:block>
20 changes: 20 additions & 0 deletions uax/uax42/fragments/emoji-data/Emoji.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="Emoji properties" id='schema.emoji-data'>
code-point-attributes &amp;=
attribute Emoji { boolean }?

code-point-attributes &amp;=
attribute EPres { boolean }?

code-point-attributes &amp;=
attribute EMod { boolean }?

code-point-attributes &amp;=
attribute EBase { boolean }?

code-point-attributes &amp;=
attribute EComp { boolean }?

code-point-attributes &amp;=
attribute ExtPict { boolean }?
</ucdxml:block>
11 changes: 11 additions & 0 deletions uax/uax42/fragments/emoji-sources/emoji-sources.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="emoji sources" id='schema.emoji-sources'>
ucd.content &amp;=
element emoji-sources {
element emoji-source {
attribute unicode { one-or-more-code-points },
attribute docomo { jis-code-point? },
attribute kddi { jis-code-point? },
attribute softbank { jis-code-point? } }+ }?
</ucdxml:block>
15 changes: 15 additions & 0 deletions uax/uax42/fragments/named-sequences/named-sequences.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="named sequences" id='schema.named-sequences'>
ucd.content &amp;=
element named-sequences {
element named-sequence {
attribute cps { one-or-more-code-points },
attribute name { text } }+ }?

ucd.content &amp;=
element provisional-named-sequences {
element named-sequence {
attribute cps { one-or-more-code-points },
attribute name { text } }+ }?
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/namespace/namespace.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="namespace declaration" id='schema.namespace'>
default namespace ucd = "http://www.unicode.org/ns/2003/ucd/1.0"
</ucdxml:block>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--Manual-->
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="normalization corrections" id='schema.normalization-corrections'>
ucd.content &amp;=
element normalization-corrections {
element normalization-correction {
attribute cp { single-code-point },
attribute old { one-or-more-code-points },
attribute new { one-or-more-code-points },
attribute version { text } }+ }?
</ucdxml:block>
8 changes: 8 additions & 0 deletions uax/uax42/fragments/nushu/Nushu.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="Nushu data" id='schema.nushu'>
code-point-attributes &amp;=
attribute kSrc_NushuDuben { xsd:string { pattern="[0-9]+\.[0-9]+" } }?

code-point-attributes &amp;=
attribute kReading { xsd:string }?
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/properties/Bidi_C.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="Bidi_C attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute Bidi_C { boolean }?
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/properties/Bidi_M.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="Bidi_M attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute Bidi_M { boolean }?
</ucdxml:block>
9 changes: 9 additions & 0 deletions uax/uax42/fragments/properties/InCB.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="InCB attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute InCB { "Consonant"
| "Extend"
| "Linker"
| "None"
}?
</ucdxml:block>
21 changes: 21 additions & 0 deletions uax/uax42/fragments/properties/InPC.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="InPC attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute InPC { "Bottom"
| "Bottom_And_Left"
| "Bottom_And_Right"
| "Left"
| "Left_And_Right"
| "NA"
| "Overstruck"
| "Right"
| "Top"
| "Top_And_Bottom"
| "Top_And_Bottom_And_Left"
| "Top_And_Bottom_And_Right"
| "Top_And_Left"
| "Top_And_Left_And_Right"
| "Top_And_Right"
| "Visual_Order_Left"
}?
</ucdxml:block>
42 changes: 42 additions & 0 deletions uax/uax42/fragments/properties/InSC.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="InSC attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute InSC { "Avagraha"
| "Bindu"
| "Brahmi_Joining_Number"
| "Cantillation_Mark"
| "Consonant"
| "Consonant_Dead"
| "Consonant_Final"
| "Consonant_Head_Letter"
| "Consonant_Initial_Postfixed"
| "Consonant_Killer"
| "Consonant_Medial"
| "Consonant_Placeholder"
| "Consonant_Preceding_Repha"
| "Consonant_Prefixed"
| "Consonant_Subjoined"
| "Consonant_Succeeding_Repha"
| "Consonant_With_Stacker"
| "Gemination_Mark"
| "Invisible_Stacker"
| "Joiner"
| "Modifying_Letter"
| "Non_Joiner"
| "Nukta"
| "Number"
| "Number_Joiner"
| "Other"
| "Pure_Killer"
| "Register_Shifter"
| "Reordering_Killer"
| "Syllable_Modifier"
| "Tone_Letter"
| "Tone_Mark"
| "Virama"
| "Visarga"
| "Vowel"
| "Vowel_Dependent"
| "Vowel_Independent"
}?
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/properties/JSN.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="JSN attribute" id='schema.properties'>
code-point-attributes &amp;=
attribute JSN { xsd:string { pattern="[A-Z]{0,3}" } }?
</ucdxml:block>
5 changes: 5 additions & 0 deletions uax/uax42/fragments/properties/Join_C.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="joining properties" id='schema.properties'>
code-point-attributes &amp;=
attribute Join_C { boolean }?
</ucdxml:block>
10 changes: 10 additions & 0 deletions uax/uax42/fragments/properties/Name_Alias.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<ucdxml:block xmlns:ucdxml="http://unicode.org/ns/2001/ucdxml" title="name-alias element" id='schema.properties'>
code-point-attributes &amp;=
element name-alias {
attribute alias { xsd:string { pattern="[a-zA-Z0-9]+(( -|- |[\-_ ])[a-zA-Z0-9]+)*" } }?,
attribute type { "abbreviation" | "alternate"
| "control" | "correction"
| "figment"
}? } *
</ucdxml:block>
Loading

0 comments on commit d612e96

Please sign in to comment.