From dbb5dd384447070f9d97d1a0d690ecac832e225c Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Mon, 16 Dec 2024 15:41:28 -0800 Subject: [PATCH] Updates from Marcus's review comments --- uax/uax42/Readme.md => docs/ucdxml.md | 6 +- .../org/unicode/xml/AttributeResolver.java | 128 +----------------- .../unicode/xml/GeneratePropertyValues.java | 6 +- .../org/unicode/props/IndexPropertyRegex.txt | 30 ++-- .../org/unicode/uax42/fragments}/Bidi_C.xml | 0 .../org/unicode/uax42/fragments}/Bidi_M.xml | 0 .../org/unicode/uax42/fragments}/Emoji.xml | 0 .../org/unicode/uax42/fragments}/InCB.xml | 0 .../org/unicode/uax42/fragments}/InPC.xml | 0 .../org/unicode/uax42/fragments}/InSC.xml | 0 .../org/unicode/uax42/fragments}/JSN.xml | 0 .../org/unicode/uax42/fragments}/Join_C.xml | 0 .../unicode/uax42/fragments}/Name_Alias.xml | 0 .../org/unicode/uax42/fragments}/Nushu.xml | 0 .../uax42/fragments/Set_of_code_points.xml | 0 .../org/unicode/uax42/fragments}/Tangut.xml | 0 .../org/unicode/uax42/fragments}/Unihan.xml | 0 .../org/unicode/uax42/fragments}/age.xml | 1 + .../org/unicode/uax42/fragments}/bc.xml | 0 .../org/unicode/uax42/fragments}/blk.xml | 0 .../org/unicode/uax42/fragments}/block.xml | 0 .../org/unicode/uax42/fragments}/bmg.xml | 0 .../org/unicode/uax42/fragments}/boolean.xml | 0 .../unicode/uax42/fragments}/boundaries.xml | 0 .../org/unicode/uax42/fragments}/bpb.xml | 0 .../org/unicode/uax42/fragments}/bpt.xml | 0 .../unicode/uax42/fragments}/case_folding.xml | 0 .../unicode/uax42/fragments}/case_mapping.xml | 0 .../unicode/uax42/fragments}/case_other.xml | 0 .../org/unicode/uax42/fragments}/casing.xml | 0 .../org/unicode/uax42/fragments}/ccc.xml | 0 .../unicode/uax42/fragments}/cjk-radicals.xml | 0 .../org/unicode/uax42/fragments}/cjkEACC.xml | 0 .../uax42/fragments}/cjkIRG_TSource.xml | 0 .../unicode/uax42/fragments}/composition.xml | 0 .../unicode/uax42/fragments}/datatypes.xml | 0 .../uax42/fragments/datatypes_code_points.xml | 0 .../uax42/fragments}/decomposition.xml | 0 .../unicode/uax42/fragments}/description.xml | 0 .../unicode/uax42/fragments}/do-not-emit.xml | 0 .../org/unicode/uax42/fragments}/ea.xml | 0 .../uax42/fragments}/emoji-sources.xml | 0 .../uax42/fragments}/function_graphic.xml | 0 .../org/unicode/uax42/fragments}/gc.xml | 0 .../org/unicode/uax42/fragments}/groups.xml | 0 .../org/unicode/uax42/fragments}/hst.xml | 0 .../unicode/uax42/fragments}/identifier.xml | 0 .../unicode/uax42/fragments}/ideographs.xml | 0 .../org/unicode/uax42/fragments}/isc.xml | 0 .../uax42/fragments}/jis-code-point.xml | 0 .../org/unicode/uax42/fragments}/joining.xml | 3 +- .../org/unicode/uax42/fragments}/lb.xml | 0 .../uax42/fragments}/miscellaneous.xml | 0 .../org/unicode/uax42/fragments}/na.xml | 0 .../org/unicode/uax42/fragments}/na1.xml | 0 .../uax42/fragments}/named-sequences.xml | 0 .../unicode/uax42/fragments}/namespace.xml | 0 .../fragments}/normalization-corrections.xml | 0 .../org/unicode/uax42/fragments}/numeric.xml | 0 .../org/unicode/uax42/fragments}/pattern.xml | 0 .../unicode/uax42/fragments}/quickcheck.xml | 0 .../unicode/uax42/fragments}/repertoire.xml | 0 .../fragments/repertoire_Code_points.xml | 0 .../org/unicode/uax42/fragments}/script.xml | 0 .../uax42/fragments}/simple_case_mapping.xml | 0 .../fragments}/standardized-variants.xml | 0 .../org/unicode/uax42/fragments}/start.xml | 0 .../resources/org/unicode}/uax42/index.xml | 122 ++++++++--------- .../org/unicode}/uax42/index2html.xsl | 0 .../org/unicode}/uax42/index2rnc.xsl | 0 .../org/unicode}/uax42/output/index.html | 4 +- .../org/unicode}/uax42/output/index.rnc | 4 +- .../main/resources/org/unicode}/uax42/pom.xml | 4 +- 73 files changed, 95 insertions(+), 213 deletions(-) rename uax/uax42/Readme.md => docs/ucdxml.md (60%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Bidi_C.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Bidi_M.xml (100%) rename {uax/uax42/fragments/emoji-data => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Emoji.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InCB.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InPC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/InSC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/JSN.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Join_C.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Name_Alias.xml (100%) rename {uax/uax42/fragments/nushu => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Nushu.xml (100%) rename uax/uax42/fragments/repertoire/Set of code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml (100%) rename {uax/uax42/fragments/tangut => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Tangut.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/Unihan.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/age.xml (96%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bc.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/blk.xml (100%) rename {uax/uax42/fragments/block => unicodetools/src/main/resources/org/unicode/uax42/fragments}/block.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bmg.xml (100%) rename {uax/uax42/fragments/boolean => unicodetools/src/main/resources/org/unicode/uax42/fragments}/boolean.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/boundaries.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bpb.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/bpt.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_folding.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_mapping.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/case_other.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/casing.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ccc.xml (100%) rename {uax/uax42/fragments/cjk-radicals => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjk-radicals.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjkEACC.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/cjkIRG_TSource.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/composition.xml (100%) rename {uax/uax42/fragments/datatypes => unicodetools/src/main/resources/org/unicode/uax42/fragments}/datatypes.xml (100%) rename uax/uax42/fragments/datatypes/code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/decomposition.xml (100%) rename {uax/uax42/fragments/description => unicodetools/src/main/resources/org/unicode/uax42/fragments}/description.xml (100%) rename {uax/uax42/fragments/do-not-emit => unicodetools/src/main/resources/org/unicode/uax42/fragments}/do-not-emit.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ea.xml (100%) rename {uax/uax42/fragments/emoji-sources => unicodetools/src/main/resources/org/unicode/uax42/fragments}/emoji-sources.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/function_graphic.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/gc.xml (100%) rename {uax/uax42/fragments/repertoire => unicodetools/src/main/resources/org/unicode/uax42/fragments}/groups.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/hst.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/identifier.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/ideographs.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/isc.xml (100%) rename {uax/uax42/fragments/datatypes => unicodetools/src/main/resources/org/unicode/uax42/fragments}/jis-code-point.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/joining.xml (96%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/lb.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/miscellaneous.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/na.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/na1.xml (100%) rename {uax/uax42/fragments/named-sequences => unicodetools/src/main/resources/org/unicode/uax42/fragments}/named-sequences.xml (100%) rename {uax/uax42/fragments/namespace => unicodetools/src/main/resources/org/unicode/uax42/fragments}/namespace.xml (100%) rename {uax/uax42/fragments/normalization-corrections => unicodetools/src/main/resources/org/unicode/uax42/fragments}/normalization-corrections.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/numeric.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/pattern.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/quickcheck.xml (100%) rename {uax/uax42/fragments/repertoire => unicodetools/src/main/resources/org/unicode/uax42/fragments}/repertoire.xml (100%) rename uax/uax42/fragments/repertoire/Code points.xml => unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/script.xml (100%) rename {uax/uax42/fragments/properties => unicodetools/src/main/resources/org/unicode/uax42/fragments}/simple_case_mapping.xml (100%) rename {uax/uax42/fragments/standardized-variants => unicodetools/src/main/resources/org/unicode/uax42/fragments}/standardized-variants.xml (100%) rename {uax/uax42/fragments/start => unicodetools/src/main/resources/org/unicode/uax42/fragments}/start.xml (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index.xml (94%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index2html.xsl (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/index2rnc.xsl (100%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/output/index.html (99%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/output/index.rnc (99%) rename {uax => unicodetools/src/main/resources/org/unicode}/uax42/pom.xml (94%) diff --git a/uax/uax42/Readme.md b/docs/ucdxml.md similarity index 60% rename from uax/uax42/Readme.md rename to docs/ucdxml.md index f2533a2a2..207842db2 100644 --- a/uax/uax42/Readme.md +++ b/docs/ucdxml.md @@ -2,13 +2,11 @@ ## Step 1 - Generate property value fragments -- Run org.unicode.xml.GeneratePropertyValues to populate the UNICODETOOLS_REPO_DIR/uax/uax42/fragments/ folder. +- mvn compile exec:java '-Dexec.mainClass="org.unicode.xml.GeneratePropertyValues"' '-Dexec.args="--ucdversion 16.0.0 -f $(cd ./unicodetools/src/main/resources/org/unicode/uax42/fragments; pwd)"' -DCLDR_DIR=$(cd ../cldr ; pwd) -DUNICODETOOLS_GEN_DIR=$(cd ../Generated ; pwd) -DUNICODETOOLS_REPO_DIR=$(pwd) ## Step 2 - Generate TR42 index.html and index.rnc -- In UNICODETOOLS_REPO_DIR/uax/uax42/ run `mvn xml:transform` - - index.html and index.rnc will be generated in UNICODETOOLS_REPO_DIR/uax/uax42/output/ +- mvn xml:transform -f $(cd ./unicodetools/src/main/resources/org/unicode/uax42/fragments; pwd) -Doutputdir=../Generated/uax42/ ## Step 3 - Validate generated UAX XML files diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java index 87db09582..393bb3281 100644 --- a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -10,46 +10,11 @@ public class AttributeResolver { private final IndexUnicodeProperties indexUnicodeProperties; private final UnicodeMap map_age; - private final UnicodeMap map_bidi_class; - private final UnicodeMap - map_bidi_paired_bracket_type; private final UnicodeMap map_block; - private final UnicodeMap - map_canonical_combining_class; private final UnicodeMap map_decomposition_type; - private final UnicodeMap map_do_not_emit_type; - private final UnicodeMap map_east_asian_width; private final UnicodeMap map_general_category; - private final UnicodeMap - map_grapheme_cluster_break; - private final UnicodeMap - map_hangul_syllable_type; - private final UnicodeMap map_identifier_status; - private final UnicodeMap map_identifier_type; - private final UnicodeMap map_idn_2008; - private final UnicodeMap map_idn_status; - private final UnicodeMap - map_indic_conjunct_break; - private final UnicodeMap - map_indic_positional_category; - private final UnicodeMap - map_indic_syllabic_category; - private final UnicodeMap map_jamo_short_name; - private final UnicodeMap map_joining_group; - private final UnicodeMap map_joining_type; - private final UnicodeMap map_line_break; - private final UnicodeMap map_nfc_quick_check; - private final UnicodeMap map_nfd_quick_check; - private final UnicodeMap map_nfkc_quick_check; - private final UnicodeMap map_nfkd_quick_check; - private final UnicodeMap map_numeric_type; - private final UnicodeMap map_other_joining_type; private final UnicodeMap map_script; private final UnicodeMap map_script_extensions; - private final UnicodeMap map_sentence_break; - private final UnicodeMap - map_vertical_orientation; - private final UnicodeMap map_word_break; private final HashMap> map_NameAlias; // If there is a change in any of these properties between two adjacent characters, it will @@ -67,47 +32,12 @@ public class AttributeResolver { public AttributeResolver(IndexUnicodeProperties iup) { indexUnicodeProperties = iup; map_age = indexUnicodeProperties.loadEnum(UcdProperty.Age); - map_bidi_class = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Class); - map_bidi_paired_bracket_type = - indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); map_block = indexUnicodeProperties.loadEnum(UcdProperty.Block); - map_canonical_combining_class = - indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); map_decomposition_type = indexUnicodeProperties.loadEnum(UcdProperty.Decomposition_Type); - map_do_not_emit_type = indexUnicodeProperties.loadEnum(UcdProperty.Do_Not_Emit_Type); - map_east_asian_width = indexUnicodeProperties.loadEnum(UcdProperty.East_Asian_Width); map_general_category = indexUnicodeProperties.loadEnum(UcdProperty.General_Category); - map_grapheme_cluster_break = - indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); - map_hangul_syllable_type = - indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); - map_identifier_status = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Status); - map_identifier_type = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Type); - map_idn_2008 = indexUnicodeProperties.loadEnum(UcdProperty.Idn_2008); - map_idn_status = indexUnicodeProperties.loadEnum(UcdProperty.Idn_Status); - map_indic_conjunct_break = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); - map_indic_positional_category = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); - map_indic_syllabic_category = - indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); - map_jamo_short_name = indexUnicodeProperties.loadEnum(UcdProperty.Jamo_Short_Name); - map_joining_group = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Group); - map_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Type); - map_line_break = indexUnicodeProperties.loadEnum(UcdProperty.Line_Break); - map_nfc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFC_Quick_Check); - map_nfd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFD_Quick_Check); - map_nfkc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKC_Quick_Check); - map_nfkd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKD_Quick_Check); - map_numeric_type = indexUnicodeProperties.loadEnum(UcdProperty.Numeric_Type); - map_other_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Other_Joining_Type); map_script = indexUnicodeProperties.loadEnum(UcdProperty.Script); map_script_extensions = indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); - map_sentence_break = indexUnicodeProperties.loadEnum(UcdProperty.Sentence_Break); - map_vertical_orientation = - indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); - map_word_break = indexUnicodeProperties.loadEnum(UcdProperty.Word_Break); // UCD code is only set up to read a single Alias value from NameAliases.txt // Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in @@ -295,12 +225,6 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { } case Enumerated: switch (prop) { - case Bidi_Class: - return map_bidi_class.get(codepoint).getShortName(); - case Bidi_Paired_Bracket_Type: - return map_bidi_paired_bracket_type.get(codepoint).getShortName(); - case Canonical_Combining_Class: - return map_canonical_combining_class.get(codepoint).getShortName(); case Decomposition_Type: // Returning lower case to maintain compatibility with older generated // files. @@ -308,56 +232,10 @@ public String getAttributeValue(UcdProperty prop, int codepoint) { .get(codepoint) .getShortName() .toLowerCase(Locale.ROOT); - case Do_Not_Emit_Type: - return map_do_not_emit_type.get(codepoint).getShortName(); - case East_Asian_Width: - return map_east_asian_width.get(codepoint).getShortName(); - case General_Category: - return map_general_category.get(codepoint).getShortName(); - case Grapheme_Cluster_Break: - return map_grapheme_cluster_break.get(codepoint).getShortName(); - case Hangul_Syllable_Type: - return map_hangul_syllable_type.get(codepoint).getShortName(); - case Identifier_Status: - return map_identifier_status.get(codepoint).getShortName(); - case Identifier_Type: - return map_identifier_type.get(codepoint).getShortName(); - case Idn_2008: - return map_idn_2008.get(codepoint).getShortName(); - case Idn_Status: - return map_idn_status.get(codepoint).getShortName(); - case Indic_Conjunct_Break: - return map_indic_conjunct_break.get(codepoint).getShortName(); - case Indic_Positional_Category: - return map_indic_positional_category.get(codepoint).getShortName(); - case Indic_Syllabic_Category: - return map_indic_syllabic_category.get(codepoint).getShortName(); - case Joining_Group: - return map_joining_group.get(codepoint).getShortName(); - case Joining_Type: - return map_joining_type.get(codepoint).getShortName(); - case Line_Break: - return map_line_break.get(codepoint).getShortName(); - case NFC_Quick_Check: - return map_nfc_quick_check.get(codepoint).getShortName(); - case NFD_Quick_Check: - return map_nfd_quick_check.get(codepoint).getShortName(); - case NFKC_Quick_Check: - return map_nfkc_quick_check.get(codepoint).getShortName(); - case NFKD_Quick_Check: - return map_nfkd_quick_check.get(codepoint).getShortName(); - case Numeric_Type: - return map_numeric_type.get(codepoint).getShortName(); - case Other_Joining_Type: - return map_other_joining_type.get(codepoint).getShortName(); - case Sentence_Break: - return map_sentence_break.get(codepoint).getShortName(); - case Vertical_Orientation: - return map_vertical_orientation.get(codepoint).getShortName(); - case Word_Break: - return map_word_break.get(codepoint).getShortName(); default: - throw new RuntimeException("Missing Enumerated case"); + final UnicodeProperty property = indexUnicodeProperties.getProperty(prop); + final List valueAliases = property.getValueAliases(property.getValue(codepoint)); + return valueAliases.get(0); } case Binary: { diff --git a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java index 83d4be093..f8a0dfa27 100644 --- a/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/xml/GeneratePropertyValues.java @@ -385,7 +385,7 @@ private static void createPropertyFragment( private static void createPropertyFragment( String filename, String title, SCHEMA schema, String formattedFragment) throws IOException { - BufferedWriter writer = getFragmentWriter(schema.getName(), filename); + BufferedWriter writer = getFragmentWriter(filename); writer.write( "" + NEWLINE @@ -403,10 +403,10 @@ private static void createPropertyFragment( writer.close(); } - private static BufferedWriter getFragmentWriter(String schema, String filename) + private static BufferedWriter getFragmentWriter(String filename) throws IOException { File fragmentFolder = - new File(destinationFolder + File.separator + schema + File.separator); + new File(destinationFolder + File.separator); if (!fragmentFolder.exists()) { if (!fragmentFolder.mkdir()) { throw new IOException(); diff --git a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt index ac6499419..70c52767f 100644 --- a/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt +++ b/unicodetools/src/main/resources/org/unicode/props/IndexPropertyRegex.txt @@ -197,23 +197,23 @@ NC_Version ; SINGLE_VALUED ; [0-9]\.[0-9]\.[0-9] # All not listed are SINGLE_VALUED ; null # ============================= -Script_Extensions ; MULTI_VALUED ; -Standardized_Variant ; MULTI_VALUED ; .* +Script_Extensions ; MULTI_VALUED ; +Standardized_Variant ; MULTI_VALUED ; .* -Idn_Status ; SINGLE_VALUED ; -Idn_Mapping ; SINGLE_VALUED ; $codePoints -Idn_2008 ; SINGLE_VALUED ; +Idn_Status ; SINGLE_VALUED ; +Idn_Mapping ; SINGLE_VALUED ; $codePoints +Idn_2008 ; SINGLE_VALUED ; -Identifier_Status ; SINGLE_VALUED ; -Identifier_Type ; MULTI_VALUED ; +Identifier_Status ; SINGLE_VALUED ; +Identifier_Type ; MULTI_VALUED ; -Confusable_SL ; SINGLE_VALUED ; $codePoints -Confusable_SA ; SINGLE_VALUED ; $codePoints -Confusable_ML ; SINGLE_VALUED ; $codePoints -Confusable_MA ; SINGLE_VALUED ; $codePoints +Confusable_SL ; SINGLE_VALUED ; $codePoints +Confusable_SA ; SINGLE_VALUED ; $codePoints +Confusable_ML ; SINGLE_VALUED ; $codePoints +Confusable_MA ; SINGLE_VALUED ; $codePoints -#Emoji ; SINGLE_VALUED ; -#Emoji_Presentation ; SINGLE_VALUED ; -#Emoji_Modifier ; SINGLE_VALUED ; -#Emoji_Modifier_Base ; SINGLE_VALUED ; +#Emoji ; SINGLE_VALUED ; +#Emoji_Presentation ; SINGLE_VALUED ; +#Emoji_Modifier ; SINGLE_VALUED ; +#Emoji_Modifier_Base ; SINGLE_VALUED ; diff --git a/uax/uax42/fragments/properties/Bidi_C.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_C.xml similarity index 100% rename from uax/uax42/fragments/properties/Bidi_C.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_C.xml diff --git a/uax/uax42/fragments/properties/Bidi_M.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_M.xml similarity index 100% rename from uax/uax42/fragments/properties/Bidi_M.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Bidi_M.xml diff --git a/uax/uax42/fragments/emoji-data/Emoji.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Emoji.xml similarity index 100% rename from uax/uax42/fragments/emoji-data/Emoji.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Emoji.xml diff --git a/uax/uax42/fragments/properties/InCB.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InCB.xml similarity index 100% rename from uax/uax42/fragments/properties/InCB.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InCB.xml diff --git a/uax/uax42/fragments/properties/InPC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InPC.xml similarity index 100% rename from uax/uax42/fragments/properties/InPC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InPC.xml diff --git a/uax/uax42/fragments/properties/InSC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/InSC.xml similarity index 100% rename from uax/uax42/fragments/properties/InSC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/InSC.xml diff --git a/uax/uax42/fragments/properties/JSN.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/JSN.xml similarity index 100% rename from uax/uax42/fragments/properties/JSN.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/JSN.xml diff --git a/uax/uax42/fragments/properties/Join_C.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Join_C.xml similarity index 100% rename from uax/uax42/fragments/properties/Join_C.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Join_C.xml diff --git a/uax/uax42/fragments/properties/Name_Alias.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Name_Alias.xml similarity index 100% rename from uax/uax42/fragments/properties/Name_Alias.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Name_Alias.xml diff --git a/uax/uax42/fragments/nushu/Nushu.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Nushu.xml similarity index 100% rename from uax/uax42/fragments/nushu/Nushu.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Nushu.xml diff --git a/uax/uax42/fragments/repertoire/Set of code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml similarity index 100% rename from uax/uax42/fragments/repertoire/Set of code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Set_of_code_points.xml diff --git a/uax/uax42/fragments/tangut/Tangut.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Tangut.xml similarity index 100% rename from uax/uax42/fragments/tangut/Tangut.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Tangut.xml diff --git a/uax/uax42/fragments/properties/Unihan.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/Unihan.xml similarity index 100% rename from uax/uax42/fragments/properties/Unihan.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/Unihan.xml diff --git a/uax/uax42/fragments/properties/age.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml similarity index 96% rename from uax/uax42/fragments/properties/age.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml index c15963146..8a1722f22 100644 --- a/uax/uax42/fragments/properties/age.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/fragments/age.xml @@ -17,6 +17,7 @@ | "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }? \ No newline at end of file diff --git a/uax/uax42/fragments/properties/bc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bc.xml similarity index 100% rename from uax/uax42/fragments/properties/bc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bc.xml diff --git a/uax/uax42/fragments/properties/blk.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/blk.xml similarity index 100% rename from uax/uax42/fragments/properties/blk.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/blk.xml diff --git a/uax/uax42/fragments/block/block.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/block.xml similarity index 100% rename from uax/uax42/fragments/block/block.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/block.xml diff --git a/uax/uax42/fragments/properties/bmg.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bmg.xml similarity index 100% rename from uax/uax42/fragments/properties/bmg.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bmg.xml diff --git a/uax/uax42/fragments/boolean/boolean.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/boolean.xml similarity index 100% rename from uax/uax42/fragments/boolean/boolean.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/boolean.xml diff --git a/uax/uax42/fragments/properties/boundaries.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/boundaries.xml similarity index 100% rename from uax/uax42/fragments/properties/boundaries.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/boundaries.xml diff --git a/uax/uax42/fragments/properties/bpb.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bpb.xml similarity index 100% rename from uax/uax42/fragments/properties/bpb.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bpb.xml diff --git a/uax/uax42/fragments/properties/bpt.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/bpt.xml similarity index 100% rename from uax/uax42/fragments/properties/bpt.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/bpt.xml diff --git a/uax/uax42/fragments/properties/case_folding.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_folding.xml similarity index 100% rename from uax/uax42/fragments/properties/case_folding.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_folding.xml diff --git a/uax/uax42/fragments/properties/case_mapping.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_mapping.xml similarity index 100% rename from uax/uax42/fragments/properties/case_mapping.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_mapping.xml diff --git a/uax/uax42/fragments/properties/case_other.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/case_other.xml similarity index 100% rename from uax/uax42/fragments/properties/case_other.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/case_other.xml diff --git a/uax/uax42/fragments/properties/casing.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/casing.xml similarity index 100% rename from uax/uax42/fragments/properties/casing.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/casing.xml diff --git a/uax/uax42/fragments/properties/ccc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ccc.xml similarity index 100% rename from uax/uax42/fragments/properties/ccc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ccc.xml diff --git a/uax/uax42/fragments/cjk-radicals/cjk-radicals.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjk-radicals.xml similarity index 100% rename from uax/uax42/fragments/cjk-radicals/cjk-radicals.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjk-radicals.xml diff --git a/uax/uax42/fragments/properties/cjkEACC.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkEACC.xml similarity index 100% rename from uax/uax42/fragments/properties/cjkEACC.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkEACC.xml diff --git a/uax/uax42/fragments/properties/cjkIRG_TSource.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkIRG_TSource.xml similarity index 100% rename from uax/uax42/fragments/properties/cjkIRG_TSource.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/cjkIRG_TSource.xml diff --git a/uax/uax42/fragments/properties/composition.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/composition.xml similarity index 100% rename from uax/uax42/fragments/properties/composition.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/composition.xml diff --git a/uax/uax42/fragments/datatypes/datatypes.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes.xml similarity index 100% rename from uax/uax42/fragments/datatypes/datatypes.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes.xml diff --git a/uax/uax42/fragments/datatypes/code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml similarity index 100% rename from uax/uax42/fragments/datatypes/code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/datatypes_code_points.xml diff --git a/uax/uax42/fragments/properties/decomposition.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/decomposition.xml similarity index 100% rename from uax/uax42/fragments/properties/decomposition.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/decomposition.xml diff --git a/uax/uax42/fragments/description/description.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/description.xml similarity index 100% rename from uax/uax42/fragments/description/description.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/description.xml diff --git a/uax/uax42/fragments/do-not-emit/do-not-emit.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/do-not-emit.xml similarity index 100% rename from uax/uax42/fragments/do-not-emit/do-not-emit.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/do-not-emit.xml diff --git a/uax/uax42/fragments/properties/ea.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ea.xml similarity index 100% rename from uax/uax42/fragments/properties/ea.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ea.xml diff --git a/uax/uax42/fragments/emoji-sources/emoji-sources.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/emoji-sources.xml similarity index 100% rename from uax/uax42/fragments/emoji-sources/emoji-sources.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/emoji-sources.xml diff --git a/uax/uax42/fragments/properties/function_graphic.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/function_graphic.xml similarity index 100% rename from uax/uax42/fragments/properties/function_graphic.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/function_graphic.xml diff --git a/uax/uax42/fragments/properties/gc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/gc.xml similarity index 100% rename from uax/uax42/fragments/properties/gc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/gc.xml diff --git a/uax/uax42/fragments/repertoire/groups.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/groups.xml similarity index 100% rename from uax/uax42/fragments/repertoire/groups.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/groups.xml diff --git a/uax/uax42/fragments/properties/hst.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/hst.xml similarity index 100% rename from uax/uax42/fragments/properties/hst.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/hst.xml diff --git a/uax/uax42/fragments/properties/identifier.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/identifier.xml similarity index 100% rename from uax/uax42/fragments/properties/identifier.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/identifier.xml diff --git a/uax/uax42/fragments/properties/ideographs.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/ideographs.xml similarity index 100% rename from uax/uax42/fragments/properties/ideographs.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/ideographs.xml diff --git a/uax/uax42/fragments/properties/isc.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/isc.xml similarity index 100% rename from uax/uax42/fragments/properties/isc.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/isc.xml diff --git a/uax/uax42/fragments/datatypes/jis-code-point.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/jis-code-point.xml similarity index 100% rename from uax/uax42/fragments/datatypes/jis-code-point.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/jis-code-point.xml diff --git a/uax/uax42/fragments/properties/joining.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml similarity index 96% rename from uax/uax42/fragments/properties/joining.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml index ba6684a27..184fcca14 100644 --- a/uax/uax42/fragments/properties/joining.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/fragments/joining.xml @@ -14,7 +14,8 @@ | "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/fragments/properties/lb.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/lb.xml similarity index 100% rename from uax/uax42/fragments/properties/lb.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/lb.xml diff --git a/uax/uax42/fragments/properties/miscellaneous.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/miscellaneous.xml similarity index 100% rename from uax/uax42/fragments/properties/miscellaneous.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/miscellaneous.xml diff --git a/uax/uax42/fragments/properties/na.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/na.xml similarity index 100% rename from uax/uax42/fragments/properties/na.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/na.xml diff --git a/uax/uax42/fragments/properties/na1.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/na1.xml similarity index 100% rename from uax/uax42/fragments/properties/na1.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/na1.xml diff --git a/uax/uax42/fragments/named-sequences/named-sequences.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/named-sequences.xml similarity index 100% rename from uax/uax42/fragments/named-sequences/named-sequences.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/named-sequences.xml diff --git a/uax/uax42/fragments/namespace/namespace.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/namespace.xml similarity index 100% rename from uax/uax42/fragments/namespace/namespace.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/namespace.xml diff --git a/uax/uax42/fragments/normalization-corrections/normalization-corrections.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/normalization-corrections.xml similarity index 100% rename from uax/uax42/fragments/normalization-corrections/normalization-corrections.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/normalization-corrections.xml diff --git a/uax/uax42/fragments/properties/numeric.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/numeric.xml similarity index 100% rename from uax/uax42/fragments/properties/numeric.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/numeric.xml diff --git a/uax/uax42/fragments/properties/pattern.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/pattern.xml similarity index 100% rename from uax/uax42/fragments/properties/pattern.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/pattern.xml diff --git a/uax/uax42/fragments/properties/quickcheck.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/quickcheck.xml similarity index 100% rename from uax/uax42/fragments/properties/quickcheck.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/quickcheck.xml diff --git a/uax/uax42/fragments/repertoire/repertoire.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire.xml similarity index 100% rename from uax/uax42/fragments/repertoire/repertoire.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire.xml diff --git a/uax/uax42/fragments/repertoire/Code points.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml similarity index 100% rename from uax/uax42/fragments/repertoire/Code points.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/repertoire_Code_points.xml diff --git a/uax/uax42/fragments/properties/script.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/script.xml similarity index 100% rename from uax/uax42/fragments/properties/script.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/script.xml diff --git a/uax/uax42/fragments/properties/simple_case_mapping.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/simple_case_mapping.xml similarity index 100% rename from uax/uax42/fragments/properties/simple_case_mapping.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/simple_case_mapping.xml diff --git a/uax/uax42/fragments/standardized-variants/standardized-variants.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/standardized-variants.xml similarity index 100% rename from uax/uax42/fragments/standardized-variants/standardized-variants.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/standardized-variants.xml diff --git a/uax/uax42/fragments/start/start.xml b/unicodetools/src/main/resources/org/unicode/uax42/fragments/start.xml similarity index 100% rename from uax/uax42/fragments/start/start.xml rename to unicodetools/src/main/resources/org/unicode/uax42/fragments/start.xml diff --git a/uax/uax42/index.xml b/unicodetools/src/main/resources/org/unicode/uax42/index.xml similarity index 94% rename from uax/uax42/index.xml rename to unicodetools/src/main/resources/org/unicode/uax42/index.xml index 1ea2f0f65..6b4733a2b 100644 --- a/uax/uax42/index.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/index.xml @@ -650,7 +650,7 @@ The namespace for our elements is “http://www.unicode.org/ns/2003/ucd/1.0”. Our attributes are in the empty namespace. - + In all our examples, we assume that this namespace is the default one. @@ -659,7 +659,7 @@
Datatypes We use a standard XML Schema datatypes: - + Characters are pervasive in the UCD, and will need to be represented. Representing characters directly by themselves would seem the most obvious choice; for example, we could express that the decomposition of U+00E8 is “è”, that is have exactly two characters in (the @@ -672,7 +672,7 @@ decomposition of U+00E8 will be represented by the nine characters “0065 0300” in the infoset. - +
@@ -680,7 +680,7 @@ Root Element The root element of valid documents is a ucd. - + @@ -689,7 +689,7 @@ A large number of properties are boolean. We uniformly use the values Y and N for those: - + @@ -713,7 +713,7 @@ url='https://www.unicode.org/reports/tr41/tr41-34.html#Versions'>[Versions]; and conversely, that documents which do not purport to represent the UCD be described as such. - + @@ -722,7 +722,7 @@ The repertoire child element of the ucd element describes the code points and their properties. As we will see shortly, code points can be described individually or as part of a group: - +
@@ -733,7 +733,7 @@ .. U+9FA5) where all the code points have the same property values if we ignore their name and their Unihan properties. - + This observation suggests that it is profitable to represent sets of code points which share the same properties, rather than individual code points. To make the representation of the sets simple, we restrict them to be segments in the code point space, that is a set is defined by the first and @@ -759,7 +759,7 @@ This leads to four elements to describe sets of code points: - +
@@ -811,7 +811,7 @@ easier: either a property is defined by the element for a code point, or it is defined by the immediately enclosing group element. - + @@ -845,7 +845,7 @@ The age attribute captures the version of Unicode in which a code point was assigned to an abstract character, or made a surrogate or non-character. - + @@ -855,8 +855,8 @@ (na), and possibly the name this character had in version 1.0 of the standard (na1). - - + + The majority of the characters in Unicode have a name which is of the form CJK UNIFIED IDEOGRAPH-<code point>. It also happens that character names cannot contain the character U+0023 # NUMBER SIGN, so we adopted the following convention: if a @@ -890,7 +890,7 @@ The Name_Alias property is represented by zero or more name-alias child elements: - + @@ -898,7 +898,7 @@ Block property The Block property is represented by the blk attribute: - + @@ -906,7 +906,7 @@ General Category The general category is represented by the gc attribute. - + @@ -918,7 +918,7 @@ Because the set of values that this property has taken across the various versions of the UCD is rather large, our schema does not restrict the possible values to those actually used. - + @@ -926,15 +926,15 @@ Bidirectionality properties The bidirectional class is represented by the bc attribute. - + The mirrored property is represented by the Bidi_M attribute, which takes a boolean value. - + The bmg attribute is the code point of a character whose glyph is typically a mirrored image of the glyph for the current character. - + Note that we do not express the “Best Fit” element recorded in BidiMirroring.txt. For one thing, it is not meant to be machine readable. More importantly, the idea underlying the mirrored glyph is delicate to use, since it makes assumptions about the design of the fonts, and @@ -942,12 +942,12 @@ The Bidi_Control property is represented by the Bidi_C attribute. - + The bidi paired bracket type and bidi paired bracket properties are represented by the bpt and bpb attributes respectively. - - + + @@ -961,17 +961,17 @@ decomposition mapping is the character itself, we use the attribute value # (U+0023 # NUMBER SIGN) as a shorthand notation; this enables those attributes to be captured in groups. - + The properties Composition_Exclusion and Full_Composition_Exclusion are represented by the attributes CE and Comp_Ex: - + The properties NFC_Quick_Check, NFD_Quick_Check, NFKC_Quick_Check, NFKD_Quick_Check, Expands_On_NFC, Expands_On_NFD, Expands_On_NFKC, Expands_On_NKFD, FC_NFKC_Closure have corresponding attributes. - + @@ -982,7 +982,7 @@ The numeric value is represented by the nv attribute, represented as a whole number or a fraction. - + @@ -992,10 +992,10 @@ The jg attribute is the joining group of the character. - + The Join_Control property is represented by the Join_C attribute. - + @@ -1003,7 +1003,7 @@ Linebreak properties The Line_Break property is represented by the lb attribute. - + @@ -1011,7 +1011,7 @@ East Asian Width property The East Asian width property is represented by the ea attribute. - + @@ -1020,7 +1020,7 @@ The Uppercase, Lowercase, Other_Uppercase and Other_Lowercase properties are represented by corresponding attributes. - + Most characters have a case mapping and case folding properties that simply map or fold to themselves. This is very similar to the situation we encountered with names, and we adopted a similar convention: if the value of a case mapping or case folding property is the character @@ -1030,22 +1030,22 @@ The simple case mappings are recorded in the suc, slc, stc attributes. - + The non-simple casing are recorded in the uc, lc and tc attributes. - + The Simple_Case_Folding and Case_Folding properties are recorded in the scf and cf attributes respectively. - + The Case_Ignorable, Cased, Changes_When_Casefolded, Changes_When_Casemapped, Changes_When_Lowercased, Changes_When_NFKC_Casefolded, Changes_When_Titlecased, Changes_When_Uppercased, NFKC_Casefold, and NFKC_Simple_Casefold properties are recorded in these attributes: - + Note that the UCD records more information about case folding than is expressed in the properties, specifically the entries in CaseFolding.txt with status T. @@ -1057,7 +1057,7 @@ The script and script extension properties are represented by the sc and scx attributes respectively. - + @@ -1065,7 +1065,7 @@ ISO Comment properties The ISO 10646 comment field is represented by the isc attribute. - + @@ -1073,10 +1073,10 @@ Hangul properties The property Hangul_Syllable_Type is represented by the hst attribute. - + The property Jamo_Short_Name is represented by the JSN attribute: - + @@ -1085,14 +1085,14 @@ The property Indic_Syllabic_Category is represented by the InSC attribute. - + The property Indic_Positional_Category is represented by the InPC attribute: - + The property Indic_Conjunct_Break is represented by the InCB attribute: - + @@ -1104,11 +1104,11 @@ ID_Compat_Math_Start, and ID_Compat_Math_Continue are represented by corresponding attributes: - + The properties Pattern_Syntax and Pattern_White_Space are represented by corresponding attributes: - + @@ -1125,7 +1125,7 @@ describe the function or graphic characteristic of a character, and have each a corresponding attribute. - + @@ -1136,7 +1136,7 @@ Grapheme_Cluster_Break, Word_Break, and Sentence_Break each have a corresponding attribute: - + @@ -1147,7 +1147,7 @@ IDS_Trinary_Operator, IDS_Unary_Operator, and Radical have corresponding attributes: - + @@ -1156,7 +1156,7 @@ The properties Deprecated, Variation_Selector, and Noncharacter_Code_Point have corresponding attributes: - + @@ -1164,7 +1164,7 @@ Unihan properties The Unihan properties (from the Unihan database) are represented as attributes. - + @@ -1174,7 +1174,7 @@ represents the radical stroke index. The attribute kTGT_MergedSrc indicates the source reference for the character. - + @@ -1183,7 +1183,7 @@ The Nushu data are represented as attributes. The attribute kSrc_NushuDuben indicates the page number and order of the item from the NushuDuben reference source. Nushu common reading is represented as kReading. - + @@ -1192,7 +1192,7 @@ The properties Emoji, EPres, EMod, EBase, EComp, and ExtPict have corresponding attributes: - + @@ -1203,7 +1203,7 @@ The blocks child of the ucd describes the blocks. It has one child block element per block, with attributes to describe the extent and name of the block. - + @@ -1216,7 +1216,7 @@ Similarly, the provisional-named-sequences child of the ucd describes the provisional named sequences. - + @@ -1227,7 +1227,7 @@ attributes to describe the code point affected, its old normalization, its new normalization and the version of Unicode in which the correction was made. - + @@ -1238,7 +1238,7 @@ last element capture the variation sequence, the description of the desired appearance, and the shaping environment under which the appearance is different. - + @@ -1248,7 +1248,7 @@ child element cjk-radical per radical. The attributes on that last element capture the radical number, the corresponding CJK radical character, and the corresponding CJK unified ideograph. - + @@ -1256,9 +1256,9 @@ Emoji sources The emoji-sources child of the ucd describes the emoji sources. - + - + @@ -1268,7 +1268,7 @@ character sequences that should not be emitted or generated in newly authored texts. - + diff --git a/uax/uax42/index2html.xsl b/unicodetools/src/main/resources/org/unicode/uax42/index2html.xsl similarity index 100% rename from uax/uax42/index2html.xsl rename to unicodetools/src/main/resources/org/unicode/uax42/index2html.xsl diff --git a/uax/uax42/index2rnc.xsl b/unicodetools/src/main/resources/org/unicode/uax42/index2rnc.xsl similarity index 100% rename from uax/uax42/index2rnc.xsl rename to unicodetools/src/main/resources/org/unicode/uax42/index2rnc.xsl diff --git a/uax/uax42/output/index.html b/unicodetools/src/main/resources/org/unicode/uax42/output/index.html similarity index 99% rename from uax/uax42/output/index.html rename to unicodetools/src/main/resources/org/unicode/uax42/output/index.html index ccde1ac04..13bf8181d 100644 --- a/uax/uax42/output/index.html +++ b/unicodetools/src/main/resources/org/unicode/uax42/output/index.html @@ -646,6 +646,7 @@

| "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }? @@ -1368,7 +1369,8 @@

| "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/output/index.rnc b/unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc similarity index 99% rename from uax/uax42/output/index.rnc rename to unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc index 7cdf380f3..84d9b5875 100644 --- a/uax/uax42/output/index.rnc +++ b/unicodetools/src/main/resources/org/unicode/uax42/output/index.rnc @@ -79,6 +79,7 @@ | "14.0" | "15.0" | "15.1" | "16.0" + | "17.0" | "unassigned" }? @@ -553,7 +554,8 @@ | "Hah" | "Hanifi_Rohingya_Kinna_Ya" | "Hanifi_Rohingya_Pa" | "He" | "Heh" | "Heh_Goal" | "Heth" - | "Kaf" | "Kaph" | "Khaph" | "Knotted_Heh" + | "Kaf" | "Kaph" | "Kashmiri_Yeh" | "Khaph" + | "Knotted_Heh" | "Lam" | "Lamadh" | "Malayalam_Bha" | "Malayalam_Ja" | "Malayalam_Lla" | "Malayalam_Llla" | "Malayalam_Nga" diff --git a/uax/uax42/pom.xml b/unicodetools/src/main/resources/org/unicode/uax42/pom.xml similarity index 94% rename from uax/uax42/pom.xml rename to unicodetools/src/main/resources/org/unicode/uax42/pom.xml index c18d2f3d0..9ae81d56f 100644 --- a/uax/uax42/pom.xml +++ b/unicodetools/src/main/resources/org/unicode/uax42/pom.xml @@ -35,7 +35,7 @@ index.xml index2html.xsl - ${project.basedir}/output/ + ${outputdir} .html @@ -49,7 +49,7 @@ index.xml index2rnc.xsl - ${project.basedir}/output/ + ${outputdir} .rnc