From 5986dfa6e6cc72ab32269044bb09d06409a12efe Mon Sep 17 00:00:00 2001 From: John Wilcock Date: Thu, 6 Jun 2024 14:31:05 -0700 Subject: [PATCH] Initial checkin for UcdXML --- .gitignore | 1 + .../org/unicode/xml/AttributeResolver.java | 404 ++++++++ .../java/org/unicode/xml/UCDDataResolver.java | 194 ++++ .../java/org/unicode/xml/UCDXMLWriter.java | 78 ++ .../org/unicode/xml/UcdPropertyDetail.java | 923 ++++++++++++++++++ .../org/unicode/xml/UcdSectionComponent.java | 28 + .../org/unicode/xml/UcdSectionDetail.java | 153 +++ .../src/main/java/org/unicode/xml/UcdXML.java | 362 +++++++ 8 files changed, 2143 insertions(+) create mode 100644 unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java create mode 100644 unicodetools/src/main/java/org/unicode/xml/UcdXML.java diff --git a/.gitignore b/.gitignore index 60e7ec63ef..c6d5a34bd2 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ perf-*.xml test-*.xml # Directories +.idea/ .settings/ .vs/ .vscode/ diff --git a/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java new file mode 100644 index 0000000000..b5d0936357 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java @@ -0,0 +1,404 @@ +package org.unicode.xml; + +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.util.VersionInfo; +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.props.*; + +import java.util.*; + +public class AttributeResolver { + + private final IndexUnicodeProperties indexUnicodeProperties; + private final UnicodeMap map_age; + private final UnicodeMap map_bidi_class; + private final UnicodeMap map_bidi_paired_bracket_type; + private final UnicodeMap map_block; + private final UnicodeMap map_canonical_combining_class; + private final UnicodeMap map_decomposition_type; + private final UnicodeMap map_do_not_emit_type; + private final UnicodeMap map_east_asian_width; + private final UnicodeMap map_general_category; + private final UnicodeMap map_grapheme_cluster_break; + private final UnicodeMap map_hangul_syllable_type; + private final UnicodeMap map_identifier_status; + private final UnicodeMap map_identifier_type; + private final UnicodeMap map_idn_2008; + private final UnicodeMap map_idn_status; + private final UnicodeMap map_indic_conjunct_break; + private final UnicodeMap map_indic_positional_category; + private final UnicodeMap map_indic_syllabic_category; + private final UnicodeMap map_jamo_short_name; + private final UnicodeMap map_joining_group; + private final UnicodeMap map_joining_type; + private final UnicodeMap map_line_break; + private final UnicodeMap map_nfc_quick_check; + private final UnicodeMap map_nfd_quick_check; + private final UnicodeMap map_nfkc_quick_check; + private final UnicodeMap map_nfkd_quick_check; + private final UnicodeMap map_numeric_type; + private final UnicodeMap map_other_joining_type; + private final UnicodeMap map_script; + private final UnicodeMap map_script_extensions; + private final UnicodeMap map_sentence_break; + private final UnicodeMap map_vertical_orientation; + private final UnicodeMap map_word_break; + private final HashMap> map_NameAlias; + + //If there is a change in any of these properties between two adjacent characters, it will result in a new range. + private final UcdProperty[] rangeDefiningProperties = { + UcdProperty.Age, + UcdProperty.Bidi_Class, + UcdProperty.Block, + UcdProperty.Decomposition_Mapping, + UcdProperty.Numeric_Type, + UcdProperty.Numeric_Value, + UcdProperty.Vertical_Orientation + }; + + public AttributeResolver(IndexUnicodeProperties iup) { + indexUnicodeProperties = iup; + map_age = indexUnicodeProperties.loadEnum(UcdProperty.Age); + map_bidi_class = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Class); + map_bidi_paired_bracket_type = indexUnicodeProperties.loadEnum(UcdProperty.Bidi_Paired_Bracket_Type); + map_block = indexUnicodeProperties.loadEnum(UcdProperty.Block); + map_canonical_combining_class = indexUnicodeProperties.loadEnum(UcdProperty.Canonical_Combining_Class); + map_decomposition_type = indexUnicodeProperties.loadEnum(UcdProperty.Decomposition_Type); + map_do_not_emit_type = indexUnicodeProperties.loadEnum(UcdProperty.Do_Not_Emit_Type); + map_east_asian_width = indexUnicodeProperties.loadEnum(UcdProperty.East_Asian_Width); + map_general_category = indexUnicodeProperties.loadEnum(UcdProperty.General_Category); + map_grapheme_cluster_break = indexUnicodeProperties.loadEnum(UcdProperty.Grapheme_Cluster_Break); + map_hangul_syllable_type = indexUnicodeProperties.loadEnum(UcdProperty.Hangul_Syllable_Type); + map_identifier_status = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Status); + map_identifier_type = indexUnicodeProperties.loadEnum(UcdProperty.Identifier_Type); + map_idn_2008 = indexUnicodeProperties.loadEnum(UcdProperty.Idn_2008); + map_idn_status = indexUnicodeProperties.loadEnum(UcdProperty.Idn_Status); + map_indic_conjunct_break = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Conjunct_Break); + map_indic_positional_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Positional_Category); + map_indic_syllabic_category = indexUnicodeProperties.loadEnum(UcdProperty.Indic_Syllabic_Category); + map_jamo_short_name = indexUnicodeProperties.loadEnum(UcdProperty.Jamo_Short_Name); + map_joining_group = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Group); + map_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Joining_Type); + map_line_break = indexUnicodeProperties.loadEnum(UcdProperty.Line_Break); + map_nfc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFC_Quick_Check); + map_nfd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFD_Quick_Check); + map_nfkc_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKC_Quick_Check); + map_nfkd_quick_check = indexUnicodeProperties.loadEnum(UcdProperty.NFKD_Quick_Check); + map_numeric_type = indexUnicodeProperties.loadEnum(UcdProperty.Numeric_Type); + map_other_joining_type = indexUnicodeProperties.loadEnum(UcdProperty.Other_Joining_Type); + map_script = indexUnicodeProperties.loadEnum(UcdProperty.Script); + map_script_extensions = indexUnicodeProperties.getProperty(UcdProperty.Script_Extensions).getUnicodeMap(); + map_sentence_break = indexUnicodeProperties.loadEnum(UcdProperty.Sentence_Break); + map_vertical_orientation = indexUnicodeProperties.loadEnum(UcdProperty.Vertical_Orientation); + map_word_break = indexUnicodeProperties.loadEnum(UcdProperty.Word_Break); + + //UCD code is only set up to read a single Alias value from NameAliases.txt + //Instead, we'll load the Alias and the Type data as part of the constructor. We'll keep in memory as it + //NameAliases isn't too large. + map_NameAlias = loadNameAliases(); + } + + private enum AliasType { + ABBREVIATION ("abbreviation"), + ALTERNATE ("alternate"), + CONTROL ("control"), + CORRECTION ("correction"), + FIGMENT ("figment"); + + private final String aliasType; + + AliasType(String aliasType) { + this.aliasType = aliasType; + } + + public String toString() { + return aliasType; + } + } + + private static class NameAlias { + + private String alias; + private final AliasType type; + + private NameAlias(String alias, AliasType type) { + this.alias = alias; + this.type = type; + } + + public String getAlias() { + return alias; + } + public AliasType getType() { + return type; + } + + } + + private static class NameAliasComparator implements java.util.Comparator { + + @Override + public int compare(NameAlias o1, NameAlias o2) { + return o1.getAlias().compareTo(o2.getAlias()); + } + } + + private HashMap> loadNameAliases() { + HashMap> nameAliasesByCodepoint = new HashMap<>(); + final PropertyParsingInfo fileInfo = PropertyParsingInfo.getPropertyInfo(UcdProperty.Name_Alias); + String fullFilename = fileInfo.getFullFileName(indexUnicodeProperties.getUcdVersion()); + UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); + NameAliasComparator nameAliasComparator = new NameAliasComparator(); + + for (UcdLineParser.UcdLine line : parser) { + String[] parts = line.getParts(); + int codepoint = Integer.parseInt(parts[0], 16); + NameAlias nameAlias = new NameAlias( + parts[1], AliasType.valueOf(parts[2].toUpperCase())); + + if (nameAliasesByCodepoint.containsKey(codepoint)) { + LinkedList nameAliases = new LinkedList<>(nameAliasesByCodepoint.get(codepoint)); + nameAliases.add(nameAlias); + nameAliases.sort(nameAliasComparator); + nameAliasesByCodepoint.replace(codepoint, nameAliases); + } + else { + nameAliasesByCodepoint.put(codepoint, new LinkedList<>(List.of(nameAlias))); + } + } + return nameAliasesByCodepoint; + } + + public String getAttributeValue(UcdProperty prop, int codepoint) { + String resolvedValue = indexUnicodeProperties.getResolvedValue(prop, codepoint); + switch(prop.getType()) { + case Numeric: + switch(prop) { + case kOtherNumeric: + case kPrimaryNumeric: + case kAccountingNumeric: + return (resolvedValue.equals("NaN")) ? null : resolvedValue; + default: + return Optional.ofNullable(resolvedValue).orElse("NaN"); + } + case String: + switch(prop) { + case Equivalent_Unified_Ideograph: + String EqUIdeo = getMappingValue(codepoint, resolvedValue, false, ""); + return (EqUIdeo.equals("#")) ? null : EqUIdeo; + case kCompatibilityVariant: + String kCompatibilityVariant = getMappingValue(codepoint, resolvedValue, false, "U+"); + return (kCompatibilityVariant.equals("#")) ? "" : kCompatibilityVariant; + case kSimplifiedVariant: + case kTraditionalVariant: + String kVariant = getMappingValue(codepoint, resolvedValue, isUnihanAttributeRange(codepoint), "U+"); + return (kVariant.equals("#")) ? "" : kVariant; + case Bidi_Mirroring_Glyph: + //TODO: Question for PAG - This is probably not the desired behavior, but adding this case to maintain consistent output. + // Check the spec. But otherwise keep consistent. Update this comment to indicate why. + String bmg = getMappingValue(codepoint, resolvedValue, false, ""); + return (bmg.equals("#")) ? "" : bmg; + default: + return getMappingValue(codepoint, resolvedValue, false, ""); + } + case Miscellaneous: + switch(prop) { + case Jamo_Short_Name: + //return map_jamo_short_name.get(codepoint).getShortName(); + return Optional.ofNullable(resolvedValue).orElse(""); + case Name: + if(resolvedValue != null && resolvedValue.startsWith("CJK UNIFIED IDEOGRAPH-")) { + return "CJK UNIFIED IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("CJK COMPATIBILITY IDEOGRAPH-")) { + return "CJK COMPATIBILITY IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("TANGUT IDEOGRAPH-")) { + return "TANGUT IDEOGRAPH-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("KHITAN SMALL SCRIPT CHARACTER-")) { + return "KHITAN SMALL SCRIPT CHARACTER-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("NUSHU CHARACTER-")) { + return "NUSHU CHARACTER-#"; + } + if(resolvedValue != null && resolvedValue.startsWith("EGYPTIAN HIEROGLYPH-")) { + return "EGYPTIAN HIEROGLYPH-#"; + } + return Optional.ofNullable(resolvedValue).orElse(""); + case kDefinition: + return resolvedValue; + default: + if (resolvedValue!= null) { + return resolvedValue.replaceAll("\\|", " "); + } + return ""; + } + case Catalog: + switch(prop) { + case Age: + String age = map_age.get(codepoint).getShortName(); + return (age.equals("NA")) ? "unassigned" : age; + case Block: + return map_block.get(codepoint).getShortName(); + case Script: + return map_script.get(codepoint).getShortName(); + case Script_Extensions: + StringBuilder extensionBuilder = new StringBuilder(); + String[] extensions = map_script_extensions.get(codepoint).split("\\|", 0); + for (String extension : extensions) { + extensionBuilder.append(UcdPropertyValues.Script_Values.valueOf(extension).getShortName()); + extensionBuilder.append(" "); + } + return extensionBuilder.toString().trim(); + default: + throw new RuntimeException("Missing Catalog case"); + } + case Enumerated: + switch(prop) { + case Bidi_Class: + return map_bidi_class.get(codepoint).getShortName(); + case Bidi_Paired_Bracket_Type: + return map_bidi_paired_bracket_type.get(codepoint).getShortName(); + case Canonical_Combining_Class: + return map_canonical_combining_class.get(codepoint).getShortName(); + case Decomposition_Type: + //TODO: Question for PAG - This is probably not the desired behavior, but specifying lower case to maintain consistent output. + // Check the spec. But otherwise keep consistent. Update this comment to indicate why. + return map_decomposition_type.get(codepoint).getShortName().toLowerCase(); + case Do_Not_Emit_Type: + return map_do_not_emit_type.get(codepoint).getShortName(); + case East_Asian_Width: + return map_east_asian_width.get(codepoint).getShortName(); + case General_Category: + return map_general_category.get(codepoint).getShortName(); + case Grapheme_Cluster_Break: + return map_grapheme_cluster_break.get(codepoint).getShortName(); + case Hangul_Syllable_Type: + return map_hangul_syllable_type.get(codepoint).getShortName(); + case Identifier_Status: + return map_identifier_status.get(codepoint).getShortName(); + case Identifier_Type: + return map_identifier_type.get(codepoint).getShortName(); + case Idn_2008: + return map_idn_2008.get(codepoint).getShortName(); + case Idn_Status: + return map_idn_status.get(codepoint).getShortName(); + case Indic_Conjunct_Break: + return map_indic_conjunct_break.get(codepoint).getShortName(); + case Indic_Positional_Category: + return map_indic_positional_category.get(codepoint).getShortName(); + case Indic_Syllabic_Category: + return map_indic_syllabic_category.get(codepoint).getShortName(); + case Joining_Group: + return map_joining_group.get(codepoint).getShortName(); + case Joining_Type: + return map_joining_type.get(codepoint).getShortName(); + case Line_Break: + return map_line_break.get(codepoint).getShortName(); + case NFC_Quick_Check: + return map_nfc_quick_check.get(codepoint).getShortName(); + case NFD_Quick_Check: + return map_nfd_quick_check.get(codepoint).getShortName(); + case NFKC_Quick_Check: + return map_nfkc_quick_check.get(codepoint).getShortName(); + case NFKD_Quick_Check: + return map_nfkd_quick_check.get(codepoint).getShortName(); + case Numeric_Type: + return map_numeric_type.get(codepoint).getShortName(); + case Other_Joining_Type: + return map_other_joining_type.get(codepoint).getShortName(); + case Sentence_Break: + return map_sentence_break.get(codepoint).getShortName(); + case Vertical_Orientation: + return map_vertical_orientation.get(codepoint).getShortName(); + case Word_Break: + return map_word_break.get(codepoint).getShortName(); + default: + throw new RuntimeException("Missing Enumerated case"); + } + case Binary: + { + switch(resolvedValue) { + // Seems overkill to get this from UcdPropertyValues.Binary + case "No": + return "N"; + case "Yes": + return "Y"; + default: + throw new RuntimeException("Unexpected Binary value"); + } + } + default: + throw new RuntimeException("Missing PropertyType case"); + } + } + + public boolean isUnassignedCodepoint(int codepoint) { + return UcdPropertyValues.General_Category_Values.Unassigned.equals(getgc(codepoint)) || + UcdPropertyValues.General_Category_Values.Private_Use.equals(getgc(codepoint)) || + UcdPropertyValues.General_Category_Values.Surrogate.equals(getgc(codepoint)); + } + + public UcdPropertyValues.General_Category_Values getgc(int codepoint) { + return map_general_category.get(codepoint); + } + + public String getNChar(int codepoint) { + return getAttributeValue(UcdProperty.Noncharacter_Code_Point, codepoint); + } + + public HashMap getNameAliases(int codepoint) { + HashMap nameAliases = new LinkedHashMap<>(); + LinkedList nameAliasList = map_NameAlias.get(codepoint); + if (null != nameAliasList && !nameAliasList.isEmpty()) { + for (NameAlias nameAlias : nameAliasList) { + nameAliases.put(nameAlias.getAlias(), nameAlias.getType().toString()); + } + return nameAliases; + } + return null; + } + + private String getMappingValue(int codepoint, String resolvedValue, boolean ignoreUnihanRange, String prefix) { + if (null == resolvedValue) { + return "#"; + } + int[] resolvedValueInts = resolvedValue.codePoints().toArray(); + if (resolvedValueInts.length == 1 && resolvedValueInts[0] == codepoint && !ignoreUnihanRange) { + return "#"; + } + StringBuilder sb = new StringBuilder(); + for (int i : resolvedValueInts) { + sb.append(prefix).append(getCPString(i)).append(" "); + } + return sb.toString().trim(); + } + + public boolean isDifferentRange(int codepointA, int codepointB) { + boolean isDifference = false; + for (UcdProperty property : rangeDefiningProperties) { + isDifference = isDifference || + !getAttributeValue(property, codepointA).equals(getAttributeValue(property, codepointB)); + } + return isDifference; + } + + private static String getCPString(int codepoint) { + return String.format("%4s", Integer.toHexString(codepoint)).replace(" ", "0").toUpperCase(); + } + + public String getHexString(int codepoint) { + return getCPString(codepoint); + } + + public boolean isUnihanAttributeRange(int codepoint) { + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") || + !getAttributeValue(UcdProperty.kCompatibilityVariant, codepoint).isEmpty(); + } + + public boolean isUnifiedIdeograph(int codepoint) { + return getAttributeValue(UcdProperty.Unified_Ideograph, codepoint).equals("Y") && + getAttributeValue(UcdProperty.Name, codepoint).equals("CJK UNIFIED IDEOGRAPH-#"); + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java new file mode 100644 index 0000000000..888dedc0a1 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java @@ -0,0 +1,194 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.cldr.draft.FileUtilities; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.PropertyParsingInfo; +import org.unicode.props.UcdLineParser; +import org.unicode.props.UcdProperty; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import java.util.*; + +public class UCDDataResolver { + + private final IndexUnicodeProperties indexUnicodeProperties; + private final String namespace; + private final UCDXMLWriter writer; + + public UCDDataResolver(IndexUnicodeProperties iup, String namespace, UCDXMLWriter writer) { + indexUnicodeProperties = iup; + this.namespace = namespace; + this.writer = writer; + } + + public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXException { + VersionInfo minVersion = ucdSection.getMinVersion(); + VersionInfo maxVersion = ucdSection.getMaxVersion(); + String tag = ucdSection.toString(); + String childTag = ucdSection.getChildTag(); + boolean parserWithRange = ucdSection.getParserWithRange(); + boolean parserWithMissing = ucdSection.getParserWithMissing(); + UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents(); + + if (isCompatibleVersion(minVersion, maxVersion)) { + writer.startElement(tag); { + for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) { + if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) { + final PropertyParsingInfo fileInfoEVS = PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); + String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); + UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); + parser.withRange(parserWithRange); + parser.withMissing(parserWithMissing); + switch (ucdSection) { + case BLOCKS: + for (UcdLineParser.UcdLine line : parser) { + if (!line.getOriginalLine().startsWith("#")) { + AttributesImpl attributes = getBlockAttributes(namespace, line); + writer.startElement(childTag, attributes); { + writer.endElement(childTag); + } + } + } + break; + case NAMEDSEQUENCES: + HashMap namedSequences = new HashMap<>(); + for (UcdLineParser.UcdLine line : parser) { + String[] parts = line.getParts(); + namedSequences.put(parts[0], parts[1]); + } + List names = new ArrayList<>(namedSequences.keySet()); + Collections.sort(names); + for (String name : names) { + AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, namedSequences); + writer.startElement(childTag, attributes); { + writer.endElement(childTag); + } + } + break; + default: + for (UcdLineParser.UcdLine line : parser) { + AttributesImpl attributes = getAttributes(ucdSection, namespace, line); + writer.startElement(childTag, attributes); + { + writer.endElement(childTag); + } + } + } + } + } + writer.endElement(tag); + } + } + } + + private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) { + switch(ucdSection) { + case CJKRADICALS: + return getCJKRadicalAttributes(namespace, line); + case DONOTEMIT: + return getDoNotEmitAttributes(namespace, line); + case EMOJISOURCES: + return getEmojiSourceAttributes(namespace, line); + case NORMALIZATIONCORRECTIONS: + return getNCAttributes(namespace, line); + case STANDARDIZEDVARIANTS: + return getSVAttributes(namespace, line); + default: + throw new IllegalArgumentException("getAttributes failed on an unexpected UcdSection"); + } + } + + private static AttributesImpl getBlockAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + String[] range = parts[0].split("\\.\\."); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "first-cp", "first-cp", "CDATA", range[0]); + attributes.addAttribute( + namespace, "last-cp", "last-cp", "CDATA", range[1]); + attributes.addAttribute( + namespace, "name", "name", "CDATA", parts[1]); + return attributes; + } + + private static AttributesImpl getCJKRadicalAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "number", "number", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "radical", "radical", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "ideograph", "ideograph", "CDATA", parts[2]); + return attributes; + } + + private static AttributesImpl getDoNotEmitAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "of", "of", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "use", "use", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "because", "because", "CDATA", parts[2]); + return attributes; + } + + private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "unicode", "unicode", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "docomo", "docomo", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "kddi", "kddi", "CDATA", parts[2]); + attributes.addAttribute( + namespace, "softbank", "softbank", "CDATA", parts[3]); + return attributes; + } + + private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, HashMap namedSequences) { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "name", "name", "CDATA", name); + attributes.addAttribute( + namespace, "cps", "cps", "CDATA", namedSequences.get(name)); + return attributes; + } + + private static AttributesImpl getNCAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "cp", "cp", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "old", "old", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "new", "new", "CDATA", parts[2]); + attributes.addAttribute( + namespace, "version", "version", "CDATA", parts[3]); + return attributes; + } + + private static AttributesImpl getSVAttributes(String namespace, UcdLineParser.UcdLine line) { + String[] parts = line.getParts(); + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + namespace, "cps", "cps", "CDATA", parts[0]); + attributes.addAttribute( + namespace, "desc", "desc", "CDATA", parts[1]); + attributes.addAttribute( + namespace, "when", "when", "CDATA", + parts[2] != null ? parts[2] : ""); + return attributes; + } + + private boolean isCompatibleVersion(VersionInfo minVersion, VersionInfo maxVersion) { + return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 && ( + maxVersion == null || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0)); + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java new file mode 100644 index 0000000000..1c22267b1b --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java @@ -0,0 +1,78 @@ +package org.unicode.xml; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.sax.SAXTransformerFactory; +import javax.xml.transform.sax.TransformerHandler; +import javax.xml.transform.stream.StreamResult; +import java.io.FileOutputStream; + +public class UCDXMLWriter { + + public static final String NAMESPACE + = "http://www.unicode.org/ns/2003/ucd/1.0"; + + private final TransformerHandler transformerHandler; + + public TransformerHandler getTransformerHandler() { + return transformerHandler; + } + + public UCDXMLWriter(FileOutputStream f) throws TransformerConfigurationException { + TransformerFactory tfactory = TransformerFactory.newInstance(); + SAXTransformerFactory sfactory = (SAXTransformerFactory) tfactory; + transformerHandler = sfactory.newTransformerHandler (); + Transformer transformer = transformerHandler.getTransformer (); + transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.STANDALONE, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); + transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "3"); + transformerHandler.setResult (new StreamResult(f)); + } + + public void startFile() throws SAXException { + transformerHandler.startDocument (); + char[] c = "\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray (); + transformerHandler.comment (c, 0, c.length); + c = "\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray (); + transformerHandler.comment (c, 0, c.length); + c = "\n\n\n".toCharArray (); + transformerHandler.characters (c, 0, c.length); + + } + public void endFile() throws SAXException { + transformerHandler.endDocument (); + } + + public void startElement(String tagName) throws SAXException { + AttributesImpl attributes = new AttributesImpl (); + startElement(tagName, attributes); + } + + public void startElement(String tagName, AttributesImpl attributes) throws SAXException { + transformerHandler.startElement (NAMESPACE, tagName, tagName, attributes); + } + + public void addContent(String s) throws SAXException { + char[] d = s.toCharArray (); + transformerHandler.characters (d, 0, d.length); + } + + public void endElement(String tagName) throws SAXException { + transformerHandler.endElement (NAMESPACE, tagName, tagName); + } +} + + diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java new file mode 100644 index 0000000000..5e5c607c9c --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdPropertyDetail.java @@ -0,0 +1,923 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +import java.util.LinkedHashSet; +import java.util.Set; + +public class UcdPropertyDetail { + + static private LinkedHashSet basePropertyDetails = new LinkedHashSet (); + static private LinkedHashSet cjkPropertyDetails = new LinkedHashSet (); + static private LinkedHashSet ucdxmlPropertyDetails = new LinkedHashSet (); + static private LinkedHashSet allPropertyDetails = new LinkedHashSet (); + + public static UcdPropertyDetail Age_Detail = new UcdPropertyDetail ( + UcdProperty.Age, VersionInfo.getInstance(1,1,0), 1, + true, false, false, true); + public static UcdPropertyDetail Name_Detail = new UcdPropertyDetail ( + UcdProperty.Name, VersionInfo.getInstance(1,1,0), 2, + true, false, false, true); + public static UcdPropertyDetail Jamo_Short_Name_Detail = new UcdPropertyDetail ( + UcdProperty.Jamo_Short_Name, VersionInfo.getInstance(1,1,0), 3, + true, false, false, true); + public static UcdPropertyDetail General_Category_Detail = new UcdPropertyDetail ( + UcdProperty.General_Category, VersionInfo.getInstance(1,1,0), 4, + true, false, false, true); + public static UcdPropertyDetail Canonical_Combining_Class_Detail = new UcdPropertyDetail ( + UcdProperty.Canonical_Combining_Class, VersionInfo.getInstance(1,1,0), 5, + true, false, false, true); + public static UcdPropertyDetail Decomposition_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Decomposition_Type, VersionInfo.getInstance(1,1,0), 6, + true, false, false, true); + public static UcdPropertyDetail Decomposition_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Decomposition_Mapping, VersionInfo.getInstance(1,1,0), 7, + true, false, false, true); + public static UcdPropertyDetail Numeric_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Numeric_Type, VersionInfo.getInstance(1,1,0), 8, + true, false, false, true); + public static UcdPropertyDetail Numeric_Value_Detail = new UcdPropertyDetail ( + UcdProperty.Numeric_Value, VersionInfo.getInstance(1,1,0), 9, + true, false, false, true); + public static UcdPropertyDetail Bidi_Class_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Class, VersionInfo.getInstance(1,1,0), 10, + true, false, false, true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Paired_Bracket_Type, VersionInfo.getInstance(6,3,0), 11, + true, false, false, true); + public static UcdPropertyDetail Bidi_Paired_Bracket_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Paired_Bracket, VersionInfo.getInstance(6,3,0), 12, + true, false, false, true); + public static UcdPropertyDetail Bidi_Mirrored_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Mirrored, VersionInfo.getInstance(1,1,0), 13, + true, false, false, true); + public static UcdPropertyDetail Bidi_Mirroring_Glyph_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Mirroring_Glyph, VersionInfo.getInstance(1,1,0), 14, + true, false, false, true); + public static UcdPropertyDetail Simple_Uppercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 15, + true, false, false, true); + public static UcdPropertyDetail Simple_Lowercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 16, + true, false, false, true); + public static UcdPropertyDetail Simple_Titlecase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 17, + true, false, false, true); + public static UcdPropertyDetail Uppercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Uppercase_Mapping, VersionInfo.getInstance(1,1,0), 18, + true, false, false, true); + public static UcdPropertyDetail Lowercase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Lowercase_Mapping, VersionInfo.getInstance(1,1,0), 19, + true, false, false, true); + public static UcdPropertyDetail Titlecase_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Titlecase_Mapping, VersionInfo.getInstance(1,1,0), 20, + true, false, false, true); +// public static UcdPropertyDetail Special_Case_Condition_Detail = new UcdPropertyDetail ( +// UcdProperty.Special_Case_Condition, VersionInfo.getInstance(1,1,0), 21, +// true, false, false, true); + public static UcdPropertyDetail Simple_Case_Folding_Detail = new UcdPropertyDetail ( + UcdProperty.Simple_Case_Folding, VersionInfo.getInstance(1,1,0), 22, + true, false, false, true); + public static UcdPropertyDetail Case_Folding_Detail = new UcdPropertyDetail ( + UcdProperty.Case_Folding, VersionInfo.getInstance(1,1,0), 23, + true, false, false, true); + public static UcdPropertyDetail Joining_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Joining_Type, VersionInfo.getInstance(1,1,0), 24, + true, false, false, true); + public static UcdPropertyDetail Joining_Group_Detail = new UcdPropertyDetail ( + UcdProperty.Joining_Group, VersionInfo.getInstance(1,1,0), 25, + true, false, false, true); + public static UcdPropertyDetail East_Asian_Width_Detail = new UcdPropertyDetail ( + UcdProperty.East_Asian_Width, VersionInfo.getInstance(1,1,0), 26, + true, false, false, true); + public static UcdPropertyDetail Line_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Line_Break, VersionInfo.getInstance(1,1,0), 27, + true, false, false, true); + public static UcdPropertyDetail Script_Detail = new UcdPropertyDetail ( + UcdProperty.Script, VersionInfo.getInstance(1,1,0), 28, + true, false, false, true); + public static UcdPropertyDetail Script_Extensions_Detail = new UcdPropertyDetail ( + UcdProperty.Script_Extensions, VersionInfo.getInstance(6,1,0), 29, + true, false, false, true); + public static UcdPropertyDetail Dash_Detail = new UcdPropertyDetail ( + UcdProperty.Dash, VersionInfo.getInstance(1,1,0), 30, + true, false, false, true); + public static UcdPropertyDetail White_Space_Detail = new UcdPropertyDetail ( + UcdProperty.White_Space, VersionInfo.getInstance(1,1,0), 31, + true, false, false, true); + public static UcdPropertyDetail Hyphen_Detail = new UcdPropertyDetail ( + UcdProperty.Hyphen, VersionInfo.getInstance(1,1,0), 32, + true, false, false, true); + public static UcdPropertyDetail Quotation_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Quotation_Mark, VersionInfo.getInstance(1,1,0), 33, + true, false, false, true); + public static UcdPropertyDetail Radical_Detail = new UcdPropertyDetail ( + UcdProperty.Radical, VersionInfo.getInstance(1,1,0), 34, + true, false, false, true); + public static UcdPropertyDetail Ideographic_Detail = new UcdPropertyDetail ( + UcdProperty.Ideographic, VersionInfo.getInstance(1,1,0), 35, + true, false, false, true); + public static UcdPropertyDetail Unified_Ideograph_Detail = new UcdPropertyDetail ( + UcdProperty.Unified_Ideograph, VersionInfo.getInstance(1,1,0), 36, + true, false, false, true); + public static UcdPropertyDetail IDS_Binary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Binary_Operator, VersionInfo.getInstance(1,1,0), 37, + true, false, false, true); + public static UcdPropertyDetail IDS_Trinary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Trinary_Operator, VersionInfo.getInstance(1,1,0), 38, + true, false, false, true); + public static UcdPropertyDetail Hangul_Syllable_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Hangul_Syllable_Type, VersionInfo.getInstance(1,1,0), 39, + true, false, false, true); + public static UcdPropertyDetail Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 40, + true, false, false, true); + public static UcdPropertyDetail Other_Default_Ignorable_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Default_Ignorable_Code_Point, VersionInfo.getInstance(1,1,0), 41, + true, false, false, true); + public static UcdPropertyDetail Alphabetic_Detail = new UcdPropertyDetail ( + UcdProperty.Alphabetic, VersionInfo.getInstance(1,1,0), 42, + true, false, false, true); + public static UcdPropertyDetail Other_Alphabetic_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Alphabetic, VersionInfo.getInstance(1,1,0), 43, + true, false, false, true); + public static UcdPropertyDetail Uppercase_Detail = new UcdPropertyDetail ( + UcdProperty.Uppercase, VersionInfo.getInstance(1,1,0), 44, + true, false, false, true); + public static UcdPropertyDetail Other_Uppercase_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Uppercase, VersionInfo.getInstance(1,1,0), 45, + true, false, false, true); + public static UcdPropertyDetail Lowercase_Detail = new UcdPropertyDetail ( + UcdProperty.Lowercase, VersionInfo.getInstance(1,1,0), 46, + true, false, false, true); + public static UcdPropertyDetail Other_Lowercase_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Lowercase, VersionInfo.getInstance(1,1,0), 47, + true, false, false, true); + public static UcdPropertyDetail Math_Detail = new UcdPropertyDetail ( + UcdProperty.Math, VersionInfo.getInstance(1,1,0), 48, + true, false, false, true); + public static UcdPropertyDetail Other_Math_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Math, VersionInfo.getInstance(1,1,0), 49, + true, false, false, true); + public static UcdPropertyDetail Hex_Digit_Detail = new UcdPropertyDetail ( + UcdProperty.Hex_Digit, VersionInfo.getInstance(1,1,0), 50, + true, false, false, true); + public static UcdPropertyDetail ASCII_Hex_Digit_Detail = new UcdPropertyDetail ( + UcdProperty.ASCII_Hex_Digit, VersionInfo.getInstance(1,1,0), 51, + true, false, false, true); + public static UcdPropertyDetail Noncharacter_Code_Point_Detail = new UcdPropertyDetail ( + UcdProperty.Noncharacter_Code_Point, VersionInfo.getInstance(1,1,0), 52, + true, false, false, true); + public static UcdPropertyDetail Variation_Selector_Detail = new UcdPropertyDetail ( + UcdProperty.Variation_Selector, VersionInfo.getInstance(1,1,0), 53, + true, false, false, true); + public static UcdPropertyDetail Bidi_Control_Detail = new UcdPropertyDetail ( + UcdProperty.Bidi_Control, VersionInfo.getInstance(1,1,0), 54, + true, false, false, true); + public static UcdPropertyDetail Join_Control_Detail = new UcdPropertyDetail ( + UcdProperty.Join_Control, VersionInfo.getInstance(1,1,0), 55, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Base_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Base, VersionInfo.getInstance(1,1,0), 56, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Extend_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Extend, VersionInfo.getInstance(1,1,0), 57, + true, false, false, true); + public static UcdPropertyDetail Other_Grapheme_Extend_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Grapheme_Extend, VersionInfo.getInstance(1,1,0), 58, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Link_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Link, VersionInfo.getInstance(1,1,0), 59, + true, false, false, true); + public static UcdPropertyDetail Sentence_Terminal_Detail = new UcdPropertyDetail ( + UcdProperty.Sentence_Terminal, VersionInfo.getInstance(1,1,0), 60, + true, false, false, true); + public static UcdPropertyDetail Extender_Detail = new UcdPropertyDetail ( + UcdProperty.Extender, VersionInfo.getInstance(1,1,0), 61, + true, false, false, true); + public static UcdPropertyDetail Terminal_Punctuation_Detail = new UcdPropertyDetail ( + UcdProperty.Terminal_Punctuation, VersionInfo.getInstance(1,1,0), 62, + true, false, false, true); + public static UcdPropertyDetail Diacritic_Detail = new UcdPropertyDetail ( + UcdProperty.Diacritic, VersionInfo.getInstance(1,1,0), 63, + true, false, false, true); + public static UcdPropertyDetail Deprecated_Detail = new UcdPropertyDetail ( + UcdProperty.Deprecated, VersionInfo.getInstance(1,1,0), 64, + true, false, false, true); + public static UcdPropertyDetail ID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Start, VersionInfo.getInstance(1,1,0), 65, + true, false, false, true); + public static UcdPropertyDetail Other_ID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.Other_ID_Start, VersionInfo.getInstance(1,1,0), 66, + true, false, false, true); + public static UcdPropertyDetail XID_Start_Detail = new UcdPropertyDetail ( + UcdProperty.XID_Start, VersionInfo.getInstance(1,1,0), 67, + true, false, false, true); + public static UcdPropertyDetail ID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Continue, VersionInfo.getInstance(1,1,0), 68, + true, false, false, true); + public static UcdPropertyDetail Other_ID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.Other_ID_Continue, VersionInfo.getInstance(1,1,0), 69, + true, false, false, true); + public static UcdPropertyDetail XID_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.XID_Continue, VersionInfo.getInstance(1,1,0), 70, + true, false, false, true); + public static UcdPropertyDetail Soft_Dotted_Detail = new UcdPropertyDetail ( + UcdProperty.Soft_Dotted, VersionInfo.getInstance(1,1,0), 71, + true, false, false, true); + public static UcdPropertyDetail Logical_Order_Exception_Detail = new UcdPropertyDetail ( + UcdProperty.Logical_Order_Exception, VersionInfo.getInstance(1,1,0), 72, + true, false, false, true); + public static UcdPropertyDetail Pattern_White_Space_Detail = new UcdPropertyDetail ( + UcdProperty.Pattern_White_Space, VersionInfo.getInstance(1,1,0), 73, + true, false, false, true); + public static UcdPropertyDetail Pattern_Syntax_Detail = new UcdPropertyDetail ( + UcdProperty.Pattern_Syntax, VersionInfo.getInstance(1,1,0), 74, + true, false, false, true); + public static UcdPropertyDetail Grapheme_Cluster_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Grapheme_Cluster_Break, VersionInfo.getInstance(1,1,0), 75, + true, false, false, true); + public static UcdPropertyDetail Word_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Word_Break, VersionInfo.getInstance(1,1,0), 76, + true, false, false, true); + public static UcdPropertyDetail Sentence_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Sentence_Break, VersionInfo.getInstance(1,1,0), 77, + true, false, false, true); + public static UcdPropertyDetail Composition_Exclusion_Detail = new UcdPropertyDetail ( + UcdProperty.Composition_Exclusion, VersionInfo.getInstance(1,1,0), 78, + true, false, false, true); + public static UcdPropertyDetail Full_Composition_Exclusion_Detail = new UcdPropertyDetail ( + UcdProperty.Full_Composition_Exclusion, VersionInfo.getInstance(1,1,0), 79, + true, false, false, true); + public static UcdPropertyDetail NFC_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFC_Quick_Check, VersionInfo.getInstance(1,1,0), 80, + true, false, false, true); + public static UcdPropertyDetail NFD_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFD_Quick_Check, VersionInfo.getInstance(1,1,0), 81, + true, false, false, true); + public static UcdPropertyDetail NFKC_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Quick_Check, VersionInfo.getInstance(1,1,0), 82, + true, false, false, true); + public static UcdPropertyDetail NFKD_Quick_Check_Detail = new UcdPropertyDetail ( + UcdProperty.NFKD_Quick_Check, VersionInfo.getInstance(1,1,0), 83, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFC_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFC, VersionInfo.getInstance(1,1,0), 84, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFD_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFD, VersionInfo.getInstance(1,1,0), 85, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFKC_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFKC, VersionInfo.getInstance(1,1,0), 86, + true, false, false, true); + public static UcdPropertyDetail Expands_On_NFKD_Detail = new UcdPropertyDetail ( + UcdProperty.Expands_On_NFKD, VersionInfo.getInstance(1,1,0), 87, + true, false, false, true); + public static UcdPropertyDetail FC_NFC_Closure_Detail = new UcdPropertyDetail ( + UcdProperty.FC_NFKC_Closure, VersionInfo.getInstance(1,1,0), 88, + true, false, false, true); + public static UcdPropertyDetail Case_Ignorable_Detail = new UcdPropertyDetail ( + UcdProperty.Case_Ignorable, VersionInfo.getInstance(5,2,0), 89, + true, false, false, true); + public static UcdPropertyDetail Cased_Detail = new UcdPropertyDetail ( + UcdProperty.Cased, VersionInfo.getInstance(5,2,0), 90, + true, false, false, true); + public static UcdPropertyDetail Changes_When_CaseFolded_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Casefolded, VersionInfo.getInstance(5,2,0), 91, + true, false, false, true); + public static UcdPropertyDetail Changes_When_CaseMapped_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Casemapped, VersionInfo.getInstance(5,2,0), 92, + true, false, false, true); + public static UcdPropertyDetail Changes_When_NFKC_Casefolded_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_NFKC_Casefolded, VersionInfo.getInstance(5,2,0), 93, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Lowercased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Lowercased, VersionInfo.getInstance(5,2,0), 94, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Titlecased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Titlecased, VersionInfo.getInstance(5,2,0), 95, + true, false, false, true); + public static UcdPropertyDetail Changes_When_Uppercased_Detail = new UcdPropertyDetail ( + UcdProperty.Changes_When_Uppercased, VersionInfo.getInstance(5,2,0), 96, + true, false, false, true); + public static UcdPropertyDetail NFKC_Casefold_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Casefold, VersionInfo.getInstance(5,2,0), 97, + true, false, false, true); + public static UcdPropertyDetail Indic_Syllabic_Category_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Syllabic_Category, VersionInfo.getInstance(6,0,0), 98, + true, false, false, true); +// public static UcdPropertyDetail Indic_Matra_Category_Detail = new UcdPropertyDetail ( +// UcdProperty.Indic_Matra_Category, VersionInfo.getInstance(6,0,0), VersionInfo.getInstance(7,0,0), 99, +// true, false, false, true); + public static UcdPropertyDetail Indic_Positional_Category_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Positional_Category, VersionInfo.getInstance(8,0,0), 100, + true, false, false, true); + public static UcdPropertyDetail kJa_Detail = new UcdPropertyDetail ( + UcdProperty.kJa, VersionInfo.getInstance(8,0,0), 101, + false, true, false, true); + public static UcdPropertyDetail Prepended_Concatenation_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Prepended_Concatenation_Mark, VersionInfo.getInstance(9,0,0), 102, + true, false, false, true); + public static UcdPropertyDetail Vertical_Orientation_Detail = new UcdPropertyDetail ( + UcdProperty.Vertical_Orientation, VersionInfo.getInstance(10,0,0), 103, + true, false, false, true); + public static UcdPropertyDetail Regional_Indicator_Detail = new UcdPropertyDetail ( + UcdProperty.Regional_Indicator, VersionInfo.getInstance(10,0,0), 104, + true, false, false, true); + public static UcdPropertyDetail Block_Detail = new UcdPropertyDetail ( + UcdProperty.Block, VersionInfo.getInstance(10,0,0), 105, + true, false, false, true); + public static UcdPropertyDetail Equivalent_Unified_Ideograph_Detail = new UcdPropertyDetail ( + UcdProperty.Equivalent_Unified_Ideograph, VersionInfo.getInstance(11,0,0), 106, + false, true, false, true); + public static UcdPropertyDetail kCompatibilityVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kCompatibilityVariant, VersionInfo.getInstance(11,0,0), 107, + false, true, true, true); + public static UcdPropertyDetail kRSUnicode_Detail = new UcdPropertyDetail ( + UcdProperty.kRSUnicode, VersionInfo.getInstance(11,0,0), 108, + false, true, false, true); +// public static UcdPropertyDetail kIRG_RSIndex_Detail = new UcdPropertyDetail ( +// UcdProperty.kIRG_RSIndex, VersionInfo.getInstance(11,0,0), 109, +// false, true, false, true); + public static UcdPropertyDetail kIRG_GSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_GSource, VersionInfo.getInstance(11,0,0), 110, + false, true, true, true); + public static UcdPropertyDetail kIRG_TSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_TSource, VersionInfo.getInstance(11,0,0), 111, + false, true, true, true); + public static UcdPropertyDetail kIRG_JSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_JSource, VersionInfo.getInstance(11,0,0), 112, + false, true, true, true); + public static UcdPropertyDetail kIRG_KSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_KSource, VersionInfo.getInstance(11,0,0), 113, + false, true, true, true); + public static UcdPropertyDetail kIRG_KPSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_KPSource, VersionInfo.getInstance(11,0,0), 114, + false, true, true, true); + public static UcdPropertyDetail kIRG_VSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_VSource, VersionInfo.getInstance(11,0,0), 115, + false, true, true, true); + public static UcdPropertyDetail kIRG_HSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_HSource, VersionInfo.getInstance(11,0,0), 116, + false, true, true, true); + public static UcdPropertyDetail kIRG_USource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_USource, VersionInfo.getInstance(11,0,0), 117, + false, true, true, true); + public static UcdPropertyDetail kIRG_MSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_MSource, VersionInfo.getInstance(11,0,0), 118, + false, true, true, true); + public static UcdPropertyDetail kIRG_UKSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_UKSource, VersionInfo.getInstance(13,0,0), 119, + false, true, true, true); + public static UcdPropertyDetail kIRG_SSource_Detail = new UcdPropertyDetail ( + UcdProperty.kIRG_SSource, VersionInfo.getInstance(13,0,0), 120, + false, true, true, true); + public static UcdPropertyDetail kIICore_Detail = new UcdPropertyDetail ( + UcdProperty.kIICore, VersionInfo.getInstance(11,0,0), 121, + false, true, false, true); + public static UcdPropertyDetail kUnihanCore2020_Detail = new UcdPropertyDetail ( + UcdProperty.kUnihanCore2020, VersionInfo.getInstance(11,0,0), 122, + false, true, false, true); + public static UcdPropertyDetail kGB0_Detail = new UcdPropertyDetail ( + UcdProperty.kGB0, VersionInfo.getInstance(11,0,0), 123, + false, true, false, true); + public static UcdPropertyDetail kGB1_Detail = new UcdPropertyDetail ( + UcdProperty.kGB1, VersionInfo.getInstance(11,0,0), 124, + false, true, false, true); + public static UcdPropertyDetail kGB3_Detail = new UcdPropertyDetail ( + UcdProperty.kGB3, VersionInfo.getInstance(11,0,0), 125, + false, true, false, true); + public static UcdPropertyDetail kGB5_Detail = new UcdPropertyDetail ( + UcdProperty.kGB5, VersionInfo.getInstance(11,0,0), 126, + false, true, false, true); + public static UcdPropertyDetail kGB7_Detail = new UcdPropertyDetail ( + UcdProperty.kGB7, VersionInfo.getInstance(11,0,0), 127, + false, true, false, true); + public static UcdPropertyDetail kGB8_Detail = new UcdPropertyDetail ( + UcdProperty.kGB8, VersionInfo.getInstance(11,0,0), 128, + false, true, false, true); + public static UcdPropertyDetail kCNS1986_Detail = new UcdPropertyDetail ( + UcdProperty.kCNS1986, VersionInfo.getInstance(11,0,0), 129, + false, true, false, true); + public static UcdPropertyDetail kCNS1992_Detail = new UcdPropertyDetail ( + UcdProperty.kCNS1992, VersionInfo.getInstance(11,0,0), 130, + false, true, false, true); + public static UcdPropertyDetail kJis0_Detail = new UcdPropertyDetail ( + UcdProperty.kJis0, VersionInfo.getInstance(11,0,0), 131, + false, true, false, true); + public static UcdPropertyDetail kJis1_Detail = new UcdPropertyDetail ( + UcdProperty.kJis1, VersionInfo.getInstance(11,0,0), 132, + false, true, false, true); + public static UcdPropertyDetail kJIS0213_Detail = new UcdPropertyDetail ( + UcdProperty.kJIS0213, VersionInfo.getInstance(11,0,0), 133, + false, true, false, true); + public static UcdPropertyDetail kKSC0_Detail = new UcdPropertyDetail ( + UcdProperty.kKSC0, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 134, + false, true, false, true); + public static UcdPropertyDetail kKSC1_Detail = new UcdPropertyDetail ( + UcdProperty.kKSC1, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 135, + false, true, false, true); + public static UcdPropertyDetail kKPS0_Detail = new UcdPropertyDetail ( + UcdProperty.kKPS0, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 136, + false, true, false, true); + public static UcdPropertyDetail kKPS1_Detail = new UcdPropertyDetail ( + UcdProperty.kKPS1, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 137, + false, true, false, true); + public static UcdPropertyDetail kHKSCS_Detail = new UcdPropertyDetail ( + UcdProperty.kHKSCS, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 138, + false, true, false, true); + public static UcdPropertyDetail kCantonese_Detail = new UcdPropertyDetail ( + UcdProperty.kCantonese, VersionInfo.getInstance(11,0,0), 139, + false, true, false, true); + public static UcdPropertyDetail kHangul_Detail = new UcdPropertyDetail ( + UcdProperty.kHangul, VersionInfo.getInstance(11,0,0), 140, + false, true, false, true); + public static UcdPropertyDetail kDefinition_Detail = new UcdPropertyDetail ( + UcdProperty.kDefinition, VersionInfo.getInstance(11,0,0), 141, + false, true, false, true); + public static UcdPropertyDetail kHanYu_Detail = new UcdPropertyDetail ( + UcdProperty.kHanYu, VersionInfo.getInstance(11,0,0), 142, + false, true, false, true); +// public static UcdPropertyDetail kAlternateHanYu_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateHanYu, VersionInfo.getInstance(11,0,0), 143, +// false, true, false, true); + public static UcdPropertyDetail kMandarin_Detail = new UcdPropertyDetail ( + UcdProperty.kMandarin, VersionInfo.getInstance(11,0,0), 144, + false, true, false, true); + public static UcdPropertyDetail kCihaiT_Detail = new UcdPropertyDetail ( + UcdProperty.kCihaiT, VersionInfo.getInstance(11,0,0), 145, + false, true, false, true); + public static UcdPropertyDetail kSBGY_Detail = new UcdPropertyDetail ( + UcdProperty.kSBGY, VersionInfo.getInstance(11,0,0), 146, + false, true, false, true); + public static UcdPropertyDetail kNelson_Detail = new UcdPropertyDetail ( + UcdProperty.kNelson, VersionInfo.getInstance(11,0,0), 147, + false, true, false, true); + public static UcdPropertyDetail kCowles_Detail = new UcdPropertyDetail ( + UcdProperty.kCowles, VersionInfo.getInstance(11,0,0), 148, + false, true, false, true); + public static UcdPropertyDetail kMatthews_Detail = new UcdPropertyDetail ( + UcdProperty.kMatthews, VersionInfo.getInstance(11,0,0), 149, + false, true, false, true); + public static UcdPropertyDetail kOtherNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kOtherNumeric, VersionInfo.getInstance(11,0,0), 150, + false, true, false, true); + public static UcdPropertyDetail kPhonetic_Detail = new UcdPropertyDetail ( + UcdProperty.kPhonetic, VersionInfo.getInstance(11,0,0), 151, + false, true, false, true); + public static UcdPropertyDetail kGSR_Detail = new UcdPropertyDetail ( + UcdProperty.kGSR, VersionInfo.getInstance(11,0,0), 152, + false, true, false, true); + public static UcdPropertyDetail kFenn_Detail = new UcdPropertyDetail ( + UcdProperty.kFenn, VersionInfo.getInstance(11,0,0), 153, + false, true, false, true); + public static UcdPropertyDetail kFennIndex_Detail = new UcdPropertyDetail ( + UcdProperty.kFennIndex, VersionInfo.getInstance(11,0,0), 154, + false, true, false, true); + public static UcdPropertyDetail kKarlgren_Detail = new UcdPropertyDetail ( + UcdProperty.kKarlgren, VersionInfo.getInstance(11,0,0), 155, + false, true, false, true); + public static UcdPropertyDetail kCangjie_Detail = new UcdPropertyDetail ( + UcdProperty.kCangjie, VersionInfo.getInstance(11,0,0), 156, + false, true, false, true); + public static UcdPropertyDetail kMeyerWempe_Detail = new UcdPropertyDetail ( + UcdProperty.kMeyerWempe, VersionInfo.getInstance(11,0,0), 157, + false, true, false, true); + public static UcdPropertyDetail kSimplifiedVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSimplifiedVariant, VersionInfo.getInstance(11,0,0), 158, + false, true, false, true); + public static UcdPropertyDetail kTraditionalVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kTraditionalVariant, VersionInfo.getInstance(11,0,0), 159, + false, true, false, true); + public static UcdPropertyDetail kSpecializedSemanticVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSpecializedSemanticVariant, VersionInfo.getInstance(11,0,0), 160, + false, true, false, true); + public static UcdPropertyDetail kSemanticVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSemanticVariant, VersionInfo.getInstance(11,0,0), 161, + false, true, false, true); + public static UcdPropertyDetail kVietnamese_Detail = new UcdPropertyDetail ( + UcdProperty.kVietnamese, VersionInfo.getInstance(11,0,0), 162, + false, true, false, true); + public static UcdPropertyDetail kLau_Detail = new UcdPropertyDetail ( + UcdProperty.kLau, VersionInfo.getInstance(11,0,0), 163, + false, true, false, true); + public static UcdPropertyDetail kTang_Detail = new UcdPropertyDetail ( + UcdProperty.kTang, VersionInfo.getInstance(11,0,0), 164, + false, true, false, true); + public static UcdPropertyDetail kZVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kZVariant, VersionInfo.getInstance(11,0,0), 165, + false, true, false, true); + public static UcdPropertyDetail kJapaneseKun_Detail = new UcdPropertyDetail ( + UcdProperty.kJapaneseKun, VersionInfo.getInstance(11,0,0), 166, + false, true, false, true); + public static UcdPropertyDetail kJapaneseOn_Detail = new UcdPropertyDetail ( + UcdProperty.kJapaneseOn, VersionInfo.getInstance(11,0,0), 167, + false, true, false, true); + public static UcdPropertyDetail kKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kKangXi, VersionInfo.getInstance(11,0,0), 168, + false, true, false, true); +// public static UcdPropertyDetail kAlternateKangXi_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateKangXi, VersionInfo.getInstance(11,0,0), 169, +// false, true, false, true); + public static UcdPropertyDetail kBigFive_Detail = new UcdPropertyDetail ( + UcdProperty.kBigFive, VersionInfo.getInstance(11,0,0), 170, + false, true, false, true); + public static UcdPropertyDetail kCCCII_Detail = new UcdPropertyDetail ( + UcdProperty.kCCCII, VersionInfo.getInstance(11,0,0), 171, + false, true, false, true); + public static UcdPropertyDetail kDaeJaweon_Detail = new UcdPropertyDetail ( + UcdProperty.kDaeJaweon, VersionInfo.getInstance(11,0,0), 172, + false, true, false, true); + public static UcdPropertyDetail kEACC_Detail = new UcdPropertyDetail ( + UcdProperty.kEACC, VersionInfo.getInstance(11,0,0), 173, + false, true, false, true); + public static UcdPropertyDetail kFrequency_Detail = new UcdPropertyDetail ( + UcdProperty.kFrequency, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(16,0,0), 174, + false, true, false, true); + public static UcdPropertyDetail kGradeLevel_Detail = new UcdPropertyDetail ( + UcdProperty.kGradeLevel, VersionInfo.getInstance(11,0,0), 175, + false, true, false, true); + public static UcdPropertyDetail kHDZRadBreak_Detail = new UcdPropertyDetail ( + UcdProperty.kHDZRadBreak, VersionInfo.getInstance(11,0,0), 176, + false, true, false, true); + public static UcdPropertyDetail kHKGlyph_Detail = new UcdPropertyDetail ( + UcdProperty.kHKGlyph, VersionInfo.getInstance(11,0,0), 177, + false, true, false, true); + public static UcdPropertyDetail kHanyuPinlu_Detail = new UcdPropertyDetail ( + UcdProperty.kHanyuPinlu, VersionInfo.getInstance(11,0,0), 178, + false, true, false, true); + public static UcdPropertyDetail kHanyuPinyin_Detail = new UcdPropertyDetail ( + UcdProperty.kHanyuPinyin, VersionInfo.getInstance(11,0,0), 179, + false, true, false, true); + public static UcdPropertyDetail kIRGHanyuDaZidian_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGHanyuDaZidian, VersionInfo.getInstance(11,0,0), 180, + false, true, false, true); + public static UcdPropertyDetail kIRGKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGKangXi, VersionInfo.getInstance(11,0,0), 181, + false, true, false, true); + public static UcdPropertyDetail kIRGDaeJaweon_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGDaeJaweon, VersionInfo.getInstance(11,0,0), 182, + false, true, false, true); + public static UcdPropertyDetail kIRGDaiKanwaZiten_Detail = new UcdPropertyDetail ( + UcdProperty.kIRGDaiKanwaZiten, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 183, + false, true, false, true); + public static UcdPropertyDetail kKorean_Detail = new UcdPropertyDetail ( + UcdProperty.kKorean, VersionInfo.getInstance(11,0,0), 184, + false, true, false, true); + public static UcdPropertyDetail kMainlandTelegraph_Detail = new UcdPropertyDetail ( + UcdProperty.kMainlandTelegraph, VersionInfo.getInstance(11,0,0), 185, + false, true, false, true); + public static UcdPropertyDetail kMorohashi_Detail = new UcdPropertyDetail ( + UcdProperty.kMorohashi, VersionInfo.getInstance(11,0,0), 186, + false, true, false, true); +// public static UcdPropertyDetail kAlternateMorohashi_Detail = new UcdPropertyDetail ( +// UcdProperty.kAlternateMorohashi, VersionInfo.getInstance(11,0,0), 187, +// false, true, false, true); + public static UcdPropertyDetail kPrimaryNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kPrimaryNumeric, VersionInfo.getInstance(11,0,0), 188, + false, true, false, true); + public static UcdPropertyDetail kTaiwanTelegraph_Detail = new UcdPropertyDetail ( + UcdProperty.kTaiwanTelegraph, VersionInfo.getInstance(11,0,0), 189, + false, true, false, true); + public static UcdPropertyDetail kXerox_Detail = new UcdPropertyDetail ( + UcdProperty.kXerox, VersionInfo.getInstance(11,0,0), 190, + false, true, false, true); + public static UcdPropertyDetail kPseudoGB1_Detail = new UcdPropertyDetail ( + UcdProperty.kPseudoGB1, VersionInfo.getInstance(11,0,0), 191, + false, true, false, true); + public static UcdPropertyDetail kIBMJapan_Detail = new UcdPropertyDetail ( + UcdProperty.kIBMJapan, VersionInfo.getInstance(11,0,0), 192, + false, true, false, true); + public static UcdPropertyDetail kAccountingNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kAccountingNumeric, VersionInfo.getInstance(11,0,0), 193, + false, true, false, true); + public static UcdPropertyDetail kCheungBauer_Detail = new UcdPropertyDetail ( + UcdProperty.kCheungBauer, VersionInfo.getInstance(11,0,0), 194, + false, true, false, true); + public static UcdPropertyDetail kCheungBauerIndex_Detail = new UcdPropertyDetail ( + UcdProperty.kCheungBauerIndex, VersionInfo.getInstance(11,0,0), 195, + false, true, false, true); + public static UcdPropertyDetail kFourCornerCode_Detail = new UcdPropertyDetail ( + UcdProperty.kFourCornerCode, VersionInfo.getInstance(11,0,0), 196, + false, true, false, true); +// public static UcdPropertyDetail kWubi_Detail = new UcdPropertyDetail ( +// UcdProperty.kWubi, VersionInfo.getInstance(11,0,0), 197, +// false, true, false, true); + public static UcdPropertyDetail kXHC1983_Detail = new UcdPropertyDetail ( + UcdProperty.kXHC1983, VersionInfo.getInstance(11,0,0), 198, + false, true, false, true); + public static UcdPropertyDetail kJinmeiyoKanji_Detail = new UcdPropertyDetail ( + UcdProperty.kJinmeiyoKanji, VersionInfo.getInstance(11,0,0), 199, + false, true, false, true); + public static UcdPropertyDetail kJoyoKanji_Detail = new UcdPropertyDetail ( + UcdProperty.kJoyoKanji, VersionInfo.getInstance(11,0,0), 200, + false, true, false, true); + public static UcdPropertyDetail kKoreanEducationHanja_Detail = new UcdPropertyDetail ( + UcdProperty.kKoreanEducationHanja, VersionInfo.getInstance(11,0,0), 201, + false, true, false, true); + public static UcdPropertyDetail kKoreanName_Detail = new UcdPropertyDetail ( + UcdProperty.kKoreanName, VersionInfo.getInstance(11,0,0), 202, + false, true, false, true); + public static UcdPropertyDetail kTGH_Detail = new UcdPropertyDetail ( + UcdProperty.kTGH, VersionInfo.getInstance(11,0,0), 203, + false, true, false, true); + public static UcdPropertyDetail kTGHZ2013_Detail = new UcdPropertyDetail ( + UcdProperty.kTGHZ2013, VersionInfo.getInstance(11,0,0), 204, + false, true, false, true); + public static UcdPropertyDetail kSpoofingVariant_Detail = new UcdPropertyDetail ( + UcdProperty.kSpoofingVariant, VersionInfo.getInstance(11,0,0), 205, + false, true, false, true); + public static UcdPropertyDetail kRSKanWa_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKanWa, VersionInfo.getInstance(11,0,0), 206, + false, true, false, true); + public static UcdPropertyDetail kRSJapanese_Detail = new UcdPropertyDetail ( + UcdProperty.kRSJapanese, VersionInfo.getInstance(11,0,0), 207, + false, true, false, true); + public static UcdPropertyDetail kRSKorean_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKorean, VersionInfo.getInstance(11,0,0), 208, + false, true, false, true); + public static UcdPropertyDetail kRSKangXi_Detail = new UcdPropertyDetail ( + UcdProperty.kRSKangXi, VersionInfo.getInstance(11,0,0), + VersionInfo.getInstance(15,1,0), 209, + false, true, false, true); + public static UcdPropertyDetail kRSAdobe_Japan1_6_Detail = new UcdPropertyDetail ( + UcdProperty.kRSAdobe_Japan1_6, VersionInfo.getInstance(11,0,0), 210, + false, true, false, true); + public static UcdPropertyDetail kTotalStrokes_Detail = new UcdPropertyDetail ( + UcdProperty.kTotalStrokes, VersionInfo.getInstance(11,0,0), 211, + false, true, false, true); + public static UcdPropertyDetail kRSTUnicode_Detail = new UcdPropertyDetail ( + UcdProperty.kRSTUnicode, VersionInfo.getInstance(9,0,0), 212, + false, true, false, true); + public static UcdPropertyDetail kTGT_MergedSrc_Detail = new UcdPropertyDetail ( + UcdProperty.kTGT_MergedSrc, VersionInfo.getInstance(9,0,0), 213, + false, true, false, true); + public static UcdPropertyDetail kSrc_NushuDuben_Detail = new UcdPropertyDetail ( + UcdProperty.kSrc_NushuDuben, VersionInfo.getInstance(10,0,0), 214, + false, true, false, true); + public static UcdPropertyDetail kReading_Detail = new UcdPropertyDetail ( + UcdProperty.kReading, VersionInfo.getInstance(10,0,0), 215, + false, true, false, true); + public static UcdPropertyDetail ISO_Comment_Detail = new UcdPropertyDetail ( + UcdProperty.ISO_Comment, VersionInfo.getInstance(11,0,0), 216, + true, false, false, true); + public static UcdPropertyDetail Unicode_1_Name_Detail = new UcdPropertyDetail ( + UcdProperty.Unicode_1_Name, VersionInfo.getInstance(11,0,0), 217, + true, false, false, true); + public static UcdPropertyDetail Name_Alias_Detail = new UcdPropertyDetail ( + UcdProperty.Name_Alias, VersionInfo.getInstance(11,0,0), 218, + false, false, false, true); + public static UcdPropertyDetail Emoji_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji, VersionInfo.getInstance(13,0,0), 219, + true, false, false, true); + public static UcdPropertyDetail Emoji_Presentation_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Presentation, VersionInfo.getInstance(13,0,0), 220, + true, false, false, true); + public static UcdPropertyDetail Emoji_Modifier_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Modifier, VersionInfo.getInstance(13,0,0), 221, + true, false, false, true); + public static UcdPropertyDetail Emoji_Modifier_Base_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Modifier_Base, VersionInfo.getInstance(13,0,0), 222, + true, false, false, true); + public static UcdPropertyDetail Emoji_Component_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_Component, VersionInfo.getInstance(13,0,0), 223, + true, false, false, true); + public static UcdPropertyDetail Extended_Pictographic_Detail = new UcdPropertyDetail ( + UcdProperty.Extended_Pictographic, VersionInfo.getInstance(13,0,0), 224, + true, false, false, true); + public static UcdPropertyDetail kStrange_Detail = new UcdPropertyDetail ( + UcdProperty.kStrange, VersionInfo.getInstance(14,0,0), 225, + false, true, false, true); + public static UcdPropertyDetail kAlternateTotalStrokes_Detail = new UcdPropertyDetail ( + UcdProperty.kAlternateTotalStrokes, VersionInfo.getInstance(15,0,0), 226, + false, true, false, true); + public static UcdPropertyDetail NFKC_Simple_Casefold_Detail = new UcdPropertyDetail ( + UcdProperty.NFKC_Simple_Casefold, VersionInfo.getInstance(15,1,0), 227, + true, false, false, true); + public static UcdPropertyDetail ID_Compat_Math_Start_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Compat_Math_Start, VersionInfo.getInstance(15,1,0), 228, + true, false, false, true); + public static UcdPropertyDetail ID_Compat_Math_Continue_Detail = new UcdPropertyDetail ( + UcdProperty.ID_Compat_Math_Continue, VersionInfo.getInstance(15,1,0), 229, + true, false, false, true); + public static UcdPropertyDetail IDS_Unary_Operator_Detail = new UcdPropertyDetail ( + UcdProperty.IDS_Unary_Operator, VersionInfo.getInstance(15,1,0), 230, + true, false, false, true); + public static UcdPropertyDetail kJapanese_Detail = new UcdPropertyDetail ( + UcdProperty.kJapanese, VersionInfo.getInstance(15,1,0), 231, + false, true, false, true); + public static UcdPropertyDetail kMojiJoho_Detail = new UcdPropertyDetail ( + UcdProperty.kMojiJoho, VersionInfo.getInstance(15,1,0), 232, + false, true, false, true); + public static UcdPropertyDetail kSMSZD2003Index_Detail = new UcdPropertyDetail ( + UcdProperty.kSMSZD2003Index, VersionInfo.getInstance(15,1,0), 233, + false, true, false, true); + public static UcdPropertyDetail kSMSZD2003Readings_Detail = new UcdPropertyDetail ( + UcdProperty.kSMSZD2003Readings, VersionInfo.getInstance(15,1,0), 234, + false, true, false, true); + public static UcdPropertyDetail kVietnameseNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kVietnameseNumeric, VersionInfo.getInstance(15,1,0), 235, + false, true, false, true); + public static UcdPropertyDetail kZhuangNumeric_Detail = new UcdPropertyDetail ( + UcdProperty.kZhuangNumeric, VersionInfo.getInstance(15,1,0), 236, + false, true, false, true); + public static UcdPropertyDetail Indic_Conjunct_Break_Detail = new UcdPropertyDetail ( + UcdProperty.Indic_Conjunct_Break, VersionInfo.getInstance(15,1,0), 237, + true, false, false, true); + public static UcdPropertyDetail Modifier_Combining_Mark_Detail = new UcdPropertyDetail ( + UcdProperty.Modifier_Combining_Mark, VersionInfo.getInstance(16,0,0), 238, + true, false, false, true); + public static UcdPropertyDetail kFanqie_Detail = new UcdPropertyDetail ( + UcdProperty.kFanqie, VersionInfo.getInstance(16,0,0), 239, + false, true, false, true); + public static UcdPropertyDetail kZhuang_Detail = new UcdPropertyDetail ( + UcdProperty.kZhuang, VersionInfo.getInstance(16,0,0), 240, + false, true, false, true); + public static UcdPropertyDetail Basic_Emoji_Detail = new UcdPropertyDetail ( + UcdProperty.Basic_Emoji, -1, + false, false, false, false); + public static UcdPropertyDetail CJK_Radical_Detail = new UcdPropertyDetail ( + UcdProperty.CJK_Radical, -2, + false, false, false, false); + public static UcdPropertyDetail Confusable_MA_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_MA, -3, + false, false, false, false); + public static UcdPropertyDetail Confusable_ML_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_ML, -4, + false, false, false, false); + public static UcdPropertyDetail Confusable_SA_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_SA, -5, + false, false, false, false); + public static UcdPropertyDetail Confusable_SL_Detail = new UcdPropertyDetail ( + UcdProperty.Confusable_SL, -6, + false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Preferred_Detail = new UcdPropertyDetail ( + UcdProperty.Do_Not_Emit_Preferred, -7, + false, false, false, false); + public static UcdPropertyDetail Do_Not_Emit_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Do_Not_Emit_Type, -8, + false, false, false, false); + public static UcdPropertyDetail Emoji_DCM_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_DCM, VersionInfo.getInstance(6,0,0), -9, + false, false, false, false); + public static UcdPropertyDetail Emoji_KDDI_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_KDDI, VersionInfo.getInstance(6,0,0), -10, + false, false, false, false); + public static UcdPropertyDetail Emoji_SB_Detail = new UcdPropertyDetail ( + UcdProperty.Emoji_SB, VersionInfo.getInstance(6,0,0), -11, + false, false, false, false); + public static UcdPropertyDetail Identifier_Status_Detail = new UcdPropertyDetail ( + UcdProperty.Identifier_Status, VersionInfo.getInstance(9,0,0), -12, + false, false, false, false); + public static UcdPropertyDetail Identifier_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Identifier_Type, VersionInfo.getInstance(9,0,0), -13, + false, false, false, false); + public static UcdPropertyDetail Idn_2008_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_2008, -14, + false, false, false, false); + public static UcdPropertyDetail Idn_Mapping_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_Mapping, -15, + false, false, false, false); + public static UcdPropertyDetail Idn_Status_Detail = new UcdPropertyDetail ( + UcdProperty.Idn_Status, -16, + false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Detail = new UcdPropertyDetail ( + UcdProperty.Named_Sequences, -17, + false, false, false, false); + public static UcdPropertyDetail Named_Sequences_Prov_Detail = new UcdPropertyDetail ( + UcdProperty.Named_Sequences_Prov, -18, + false, false, false, false); + public static UcdPropertyDetail Other_Joining_Type_Detail = new UcdPropertyDetail ( + UcdProperty.Other_Joining_Type, -19, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Flag_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Flag_Sequence, -20, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Keycap_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Keycap_Sequence, -21, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Modifier_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Modifier_Sequence, -22, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Tag_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Tag_Sequence, -23, + false, false, false, false); + public static UcdPropertyDetail RGI_Emoji_Zwj_Sequence_Detail = new UcdPropertyDetail ( + UcdProperty.RGI_Emoji_Zwj_Sequence, -24, + false, false, false, false); + public static UcdPropertyDetail Standardized_Variant_Detail = new UcdPropertyDetail ( + UcdProperty.Standardized_Variant, -25, + false, false, false, false); + + private UcdProperty ucdProperty; + private VersionInfo minVersion; + private VersionInfo maxVersion; + private int sortOrder; + private boolean isBaseAttribute; + private boolean isCJKAttribute; + private boolean isCJKShowIfEmpty; + private boolean isOrgUCDXMLAttribute; + + private UcdPropertyDetail( + UcdProperty ucdProperty, + VersionInfo minVersion, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this ( + ucdProperty, minVersion, null, + sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + } + + private UcdPropertyDetail( + UcdProperty ucdProperty, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this ( + ucdProperty, null, null, + sortOrder, isBaseAttribute, isCJKAttribute, isCJKShowIfEmpty, isOrgUCDXMLAttribute); + } + + private UcdPropertyDetail( + UcdProperty ucdProperty, + VersionInfo minVersion, + VersionInfo maxVersion, + int sortOrder, + boolean isBaseAttribute, + boolean isCJKAttribute, + boolean isCJKShowIfEmpty, + boolean isOrgUCDXMLAttribute) { + this.ucdProperty = ucdProperty; + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.sortOrder = sortOrder; + this.isBaseAttribute = isBaseAttribute; + this.isCJKAttribute = isCJKAttribute; + this.isCJKShowIfEmpty = isCJKShowIfEmpty; + this.isOrgUCDXMLAttribute = isOrgUCDXMLAttribute; + + allPropertyDetails.add(this); + if(isBaseAttribute) { + basePropertyDetails.add(this); + ucdxmlPropertyDetails.add(this); + } + if(isCJKAttribute) { + cjkPropertyDetails.add(this); + ucdxmlPropertyDetails.add(this); + } + } + + public static Set values () { + return allPropertyDetails; + } + public static Set baseValues () { + return basePropertyDetails; + } + public static Set cjkValues () { + return cjkPropertyDetails; + } + public static Set ucdxmlValues () { + return ucdxmlPropertyDetails; + } + + public UcdProperty getUcdProperty() { + return this.ucdProperty; + } + + public VersionInfo getMinVersion() { + return this.minVersion; + } + + public VersionInfo getMaxVersion() { + return this.maxVersion; + } + + public boolean isBaseAttribute() { + return this.isBaseAttribute; + } + + public boolean isCJKAttribute() { + return this.isCJKAttribute; + } + + public boolean isCJKShowIfEmpty() { + return this.isCJKShowIfEmpty; + } + + public boolean isOrgUCDXMLAttribute() { + return this.isOrgUCDXMLAttribute; + } +} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java new file mode 100644 index 0000000000..0773486ccf --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionComponent.java @@ -0,0 +1,28 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +public class UcdSectionComponent { + private final VersionInfo minVersion; + private final VersionInfo maxVersion; + private final UcdProperty ucdProperty; + + UcdSectionComponent(VersionInfo minVersion, VersionInfo maxVersion, UcdProperty ucdProperty) { + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.ucdProperty = ucdProperty; + } + + public VersionInfo getMinVersion() { + return this.minVersion; + } + + public VersionInfo getMaxVersion() { + return this.maxVersion; + } + + public UcdProperty getUcdProperty() { + return this.ucdProperty; + } +} diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java new file mode 100644 index 0000000000..24b9a35a6a --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdSectionDetail.java @@ -0,0 +1,153 @@ +package org.unicode.xml; + +import com.ibm.icu.util.VersionInfo; +import org.unicode.props.UcdProperty; + +import java.util.LinkedHashSet; +import java.util.Set; + +public class UcdSectionDetail { + + public enum UcdSection { + BLOCKS ("blocks", "block", VersionInfo.getInstance(1, 1, 0), null, Blocks_Detail, true, true), + CJKRADICALS ("cjk-radicals", "cjk-radical", VersionInfo.getInstance(1, 1, 0), null, CJKRadicals_Detail, false, false), + DONOTEMIT ("do-not-emit", "instead", VersionInfo.getInstance(16, 0, 0), null, DoNotEmit_Detail, false, false), + EMOJISOURCES ("emoji-sources", "emoji-source", VersionInfo.getInstance(1, 1, 0), null, EmojiSources_Detail, true, false), + NAMEDSEQUENCES ("named-sequences", "named-sequence", VersionInfo.getInstance(1, 1, 0), null, NamedSequences_Detail, false, false), + NORMALIZATIONCORRECTIONS ("normalization-corrections", "normalization-correction", VersionInfo.getInstance(1, 1, 0), null, NormalizationCorrections_Detail, true, false), + STANDARDIZEDVARIANTS ("standardized-variants", "standardized-variant", VersionInfo.getInstance(1, 1, 0), null, StandardizedVariants_Detail, true, false); + private final String tag; + private final String childTag; + private final VersionInfo minVersion; + private final VersionInfo maxVersion; + private final UcdSectionDetail ucdSectionDetail; + private final boolean parserWithRange; + private final boolean parserWithMissing; + + UcdSection( + String tag, + String childTag, + VersionInfo minVersion, + VersionInfo maxVersion, + UcdSectionDetail ucdSectionDetail, + boolean parserWithRange, + boolean parserWithMissing) { + this.tag = tag; + this.childTag = childTag; + this.minVersion = minVersion; + this.maxVersion = maxVersion; + this.ucdSectionDetail = ucdSectionDetail; + this.parserWithRange = parserWithRange; + this.parserWithMissing = parserWithMissing; + } + + public String toString() { + return tag; + } + public String getChildTag() { + return childTag; + } + public VersionInfo getMinVersion() { + return minVersion; + } + public VersionInfo getMaxVersion() { + return maxVersion; + } + public UcdSectionDetail getUcdSectionDetail() { + return ucdSectionDetail; + } + public boolean getParserWithRange() { return parserWithRange; } + public boolean getParserWithMissing() { return parserWithMissing; } + } + + public static UcdSectionDetail Blocks_Detail = new UcdSectionDetail( + UcdSection.BLOCKS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Block) + }, + 0); + public static UcdSectionDetail NamedSequences_Detail = new UcdSectionDetail( + UcdSection.NAMEDSEQUENCES, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Named_Sequences) + }, + 1); + public static UcdSectionDetail NormalizationCorrections_Detail = new UcdSectionDetail( + UcdSection.NORMALIZATIONCORRECTIONS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.NC_Original) + }, + 2); + public static UcdSectionDetail StandardizedVariants_Detail = new UcdSectionDetail( + UcdSection.STANDARDIZEDVARIANTS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Standardized_Variant), + new UcdSectionComponent( + VersionInfo.getInstance(13, 1, 0), + null, + UcdProperty.emoji_variation_sequence) + }, + 3); + public static UcdSectionDetail CJKRadicals_Detail = new UcdSectionDetail( + UcdSection.CJKRADICALS, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.CJK_Radical) + }, + 4); + public static UcdSectionDetail EmojiSources_Detail = new UcdSectionDetail( + UcdSection.EMOJISOURCES, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Emoji_DCM) + }, + 5); + public static UcdSectionDetail DoNotEmit_Detail = new UcdSectionDetail( + UcdSection.DONOTEMIT, + new UcdSectionComponent[]{ + new UcdSectionComponent( + VersionInfo.getInstance(1, 1, 0), + null, + UcdProperty.Do_Not_Emit_Type) + }, + 6); + + private final UcdSection ucdSection; + private final UcdSectionComponent[] ucdSectionComponents; + private final int sortOrder; + + private UcdSectionDetail( + UcdSection ucdSection, + UcdSectionComponent[] ucdSectionComponents, + int sortOrder) { + this.ucdSection = ucdSection; + this.ucdSectionComponents = ucdSectionComponents; + this.sortOrder = sortOrder; + } + + public UcdSection getSection() { + return this.ucdSection; + } + public UcdSectionComponent[] getUcdSectionComponents() { + return this.ucdSectionComponents; + } + public int getSortOrder() { + return this.sortOrder; + } +} \ No newline at end of file diff --git a/unicodetools/src/main/java/org/unicode/xml/UcdXML.java b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java new file mode 100644 index 0000000000..c826b4f408 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/xml/UcdXML.java @@ -0,0 +1,362 @@ +package org.unicode.xml; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.ibm.icu.util.VersionInfo; +import com.thaiopensource.resolver.Input; +import org.unicode.props.*; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +import javax.xml.transform.TransformerConfigurationException; + + +public class UcdXML { + + private static final String NAMESPACE = "http://www.unicode.org/ns/2003/ucd/1.0"; + + private enum OutputType { + STRICT, + COMPATIBLE + } + + private enum UCDXMLOUTPUTRANGE { + ALL, + NOUNIHAN, + UNIHAN; + } + + private enum UCDXMLOUTPUTTYPE { + FLAT, + GROUPED; + } + + private enum Range { + RESERVED ("reserved"), + SURROGATE ("surrogate"), + NONCHARACTER ("noncharacter"), + CHARACTER ("char"), + CJKUNIFIEDIDEOGRAPH ("char"), + NONRANGE ("nonrange"); + + private final String tag; + + Range(String tag) { + this.tag = tag; + } + + public String toString() { + return tag; + } + } + + public static void main(String[] args) throws Exception { + + VersionInfo ucdVersion = VersionInfo.getInstance(15, 1, 0); + File destinationFolder = new File( + "C:\\_git\\Unicode\\ucdxml\\data\\" + + getVersionString(ucdVersion, 3) + "\\xmltest\\"); + if(!destinationFolder.exists()) { + destinationFolder.mkdir(); + } + buildUcdXMLFile(ucdVersion, destinationFolder, UCDXMLOUTPUTRANGE.ALL, UCDXMLOUTPUTTYPE.FLAT); + + System.out.println("end"); + } + + private static void buildUcdXMLFile( + VersionInfo ucdVersion, File destinationFolder, UCDXMLOUTPUTRANGE outputRange, UCDXMLOUTPUTTYPE outputType) + throws IOException, TransformerConfigurationException, SAXException { + int lowCodepoint = 0x0; + int highCodepoint = 0x10FFFF; + // Tangut + //int lowCodepoint = 0x17000; + //int highCodepoint = 0x1B2FB; + //0x10FFFF + + File tempFile = new File(destinationFolder, "temp.xml"); + String outputFilename = "ucd." + + outputRange.toString().toLowerCase() + "." + + outputType.toString().toLowerCase() + ".xml"; + File destinationFile = new File(destinationFolder, outputFilename); + + FileOutputStream fileOutputStream = new FileOutputStream(tempFile); + UCDXMLWriter writer = new UCDXMLWriter(fileOutputStream); + + IndexUnicodeProperties iup = IndexUnicodeProperties.make(ucdVersion); + AttributeResolver attributeResolver = new AttributeResolver(iup); + UCDDataResolver ucdDataResolver = new UCDDataResolver(iup, NAMESPACE, writer); + + writer.startFile(); + writer.startElement("ucd"); { + writer.startElement("description"); { + writer.addContent("Unicode " + getVersionString(ucdVersion, 3)); + writer.endElement("description"); + } + buildRepertoire(writer, attributeResolver, ucdVersion, lowCodepoint, highCodepoint, outputRange); + if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.BLOCKS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NAMEDSEQUENCES); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.NORMALIZATIONCORRECTIONS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.STANDARDIZEDVARIANTS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.CJKRADICALS); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.EMOJISOURCES); + ucdDataResolver.buildSection(UcdSectionDetail.UcdSection.DONOTEMIT); + } + writer.endElement("ucd"); + } + writer.endFile(); + fileOutputStream.close (); + cleanUcdXMLFile(tempFile, destinationFile); + tempFile.delete(); + } + + private static void cleanUcdXMLFile(File tempFile, File destinationFile) throws IOException { + //XALAN writes out characters outside the BMP as entities. + //Use this code to replace the entities with the correct characters. + //See: https://issues.apache.org/jira/browse/XALANJ-2595 + + FileInputStream fileInputStream = new FileInputStream(tempFile); + FileOutputStream fileOutputStream = new FileOutputStream(destinationFile); + + InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream, StandardCharsets.UTF_8); + OutputStreamWriter outputStreamWriter = new OutputStreamWriter(fileOutputStream, StandardCharsets.UTF_8); + + BufferedReader bufferedReader = new BufferedReader(inputStreamReader); + BufferedWriter bufferedWriter = new BufferedWriter(outputStreamWriter); + + String line; + while ((line = bufferedReader.readLine()) != null) { + Matcher matcher = Pattern.compile("&#([\\d]+);").matcher(line); + line = matcher.replaceAll(matchResult -> new String(Character.toChars(Integer.parseInt(matcher.group(1))))); + bufferedWriter.append(line); + bufferedWriter.newLine(); + } + bufferedWriter.flush(); + fileInputStream.close(); + fileOutputStream.close(); + } + + private static void buildRepertoire( + UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + int lowCodepoint, int highCodepoint, UCDXMLOUTPUTRANGE outputRange) + throws SAXException { + + writer.startElement("repertoire"); { + + + ArrayList range = new ArrayList<>(); + Range rangeType = Range.NONRANGE; + + for (int codepoint = lowCodepoint; codepoint <= highCodepoint; codepoint++) { + if (attributeResolver.isUnassignedCodepoint(codepoint) || + (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && attributeResolver.isUnifiedIdeograph(codepoint))) { + Range currentRangeType = getRangeType(attributeResolver, codepoint); + if (!range.isEmpty()){ + if (!currentRangeType.equals(rangeType) || attributeResolver.isDifferentRange(codepoint, codepoint - 1)) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + range.clear(); + } + } + range.add(codepoint); + rangeType = currentRangeType; + } + else { + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + range.clear(); + rangeType = Range.NONRANGE; + } + buildChar(writer, attributeResolver, ucdVersion, codepoint, outputRange); + } + } + //Handle any range before the end of the repertoire element. + if (!range.isEmpty()) { + if (outputRange != UCDXMLOUTPUTRANGE.UNIHAN) { + buildRange(writer, attributeResolver, ucdVersion, range, rangeType); + } + } + writer.endElement("repertoire"); + } + } + + private static void buildChar( + UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, int codepoint, + UCDXMLOUTPUTRANGE outputRange) + throws SAXException { + + if(outputRange != UCDXMLOUTPUTRANGE.UNIHAN || attributeResolver.isUnihanAttributeRange(codepoint)) { + AttributesImpl at = getAttributes(ucdVersion, attributeResolver, codepoint, outputRange); + writer.startElement("char", at); { + HashMap nameAliases = attributeResolver.getNameAliases(codepoint); + if (null != nameAliases && !nameAliases.isEmpty()) { + for (String alias : nameAliases.keySet()) { + AttributesImpl nameAliasAt = new AttributesImpl(); + nameAliasAt.addAttribute( + NAMESPACE, "alias", "alias", "CDATA", alias); + nameAliasAt.addAttribute( + NAMESPACE, "type", "type", "CDATA", nameAliases.get(alias)); + writer.startElement("name-alias", nameAliasAt); { + writer.endElement("name-alias"); + } + } + } + writer.endElement("char"); + } + } + } + + private static void buildRange(UCDXMLWriter writer, AttributeResolver attributeResolver, VersionInfo ucdVersion, + ArrayList range, Range rangeType) + throws SAXException { + AttributesImpl at = getReservedAttributes(ucdVersion, attributeResolver, range); + writer.startElement(rangeType.tag, at); { + writer.endElement(rangeType.tag); + } + } + + private static Range getRangeType(AttributeResolver attributeResolver, int codepoint) { + String NChar = attributeResolver.getNChar(codepoint); + UcdPropertyValues.General_Category_Values gc = attributeResolver.getgc(codepoint); + + if (attributeResolver.isUnihanAttributeRange(codepoint)) { + return Range.CJKUNIFIEDIDEOGRAPH; + } + if (gc.equals(UcdPropertyValues.General_Category_Values.Surrogate)) { + return Range.SURROGATE; + } + if (gc.equals(UcdPropertyValues.General_Category_Values.Private_Use)) { + return Range.CHARACTER; + } + if (NChar.equals(UcdPropertyValues.Binary.Yes.getShortName())) { + return Range.NONCHARACTER; + } + return Range.RESERVED; + } + + private static AttributesImpl getAttributes( + VersionInfo version, AttributeResolver attributeResolver, int codepoint, UCDXMLOUTPUTRANGE outputRange) { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", attributeResolver.getHexString(codepoint)); + + for (UcdPropertyDetail propDetail : UcdPropertyDetail.ucdxmlValues()) { + UcdProperty prop = propDetail.getUcdProperty(); + if (version.compareTo(propDetail.getMinVersion()) >= 0 && + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) < 0)) + { + String attrValue = attributeResolver.getAttributeValue(prop, codepoint); + boolean isAttributeIncluded = getIsAttributeIncluded( + attrValue, + attributeResolver.isUnihanAttributeRange(codepoint), + propDetail, prop, + outputRange); + + if(isAttributeIncluded) { + String propName = prop.getShortName(); + if(propName.startsWith("cjk")) { + propName = propName.substring(2); + } + attributes.addAttribute( + NAMESPACE, + propName, + propName, + "CDATA", + attrValue + ); + } + } + } + return attributes; + } + + private static boolean getIsAttributeIncluded( + String attrValue, + boolean isUnihanAttributeRange, + UcdPropertyDetail propDetail, + UcdProperty prop, + UCDXMLOUTPUTRANGE outputRange) { + if (attrValue == null) { return false; } + if (isUnihanAttributeRange) { + if (outputRange == UCDXMLOUTPUTRANGE.UNIHAN) { + if (prop.equals(UcdProperty.Numeric_Type) && !attrValue.equals("None")) { + return true; + } + if (prop.equals(UcdProperty.Numeric_Value) && !attrValue.equals("NaN")) { + return true; + } + return propDetail.isCJKAttribute() && (propDetail.isCJKShowIfEmpty() || !attrValue.isEmpty()); + } + if (outputRange == UCDXMLOUTPUTRANGE.NOUNIHAN && propDetail.isCJKAttribute()) { + return false; + } + if (propDetail.isCJKShowIfEmpty()) { + return true; + } + } + if (propDetail.isBaseAttribute()) { + return true; + } + return !attrValue.isEmpty(); + } + + + private static AttributesImpl getReservedAttributes( + VersionInfo version, AttributeResolver attributeResolver, ArrayList range) { + AttributesImpl attributes = new AttributesImpl(); + + if (range.size() == 1) { + attributes.addAttribute( + NAMESPACE, "cp", "cp", "CDATA", + attributeResolver.getHexString(range.get(0))); + } + else { + attributes.addAttribute( + NAMESPACE, "first-cp", "first-cp", "CDATA", + attributeResolver.getHexString(range.get(0))); + attributes.addAttribute( + NAMESPACE, "last-cp", "last-cp", "CDATA", + attributeResolver.getHexString(range.get(range.size() - 1))); + } + for (UcdPropertyDetail propDetail : UcdPropertyDetail.baseValues()) { + UcdProperty prop = propDetail.getUcdProperty(); + if (version.compareTo(propDetail.getMinVersion()) >= 0 && + (propDetail.getMaxVersion() == null || version.compareTo(propDetail.getMaxVersion()) <= 0)) + { + String attrValue = attributeResolver.getAttributeValue(propDetail.getUcdProperty(), range.get(0)); + + attributes.addAttribute( + NAMESPACE, + prop.getShortName(), + prop.getShortName(), + "CDATA", + attrValue + ); + } + } + return attributes; + } + + private static String getVersionString(VersionInfo version, int maxDigits) { + if (maxDigits >= 1 && maxDigits <= 4) { + int[] digits = new int[]{version.getMajor(), version.getMinor(), version.getMilli(), version.getMicro()}; + StringBuilder verStr = new StringBuilder(7); + verStr.append(digits[0]); + for(int i = 1; i < maxDigits; ++i) { + verStr.append("."); + verStr.append(digits[i]); + } + return verStr.toString(); + } else { + throw new IllegalArgumentException("Invalid maxDigits range"); + } + } +} \ No newline at end of file