-
-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
2,143 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,7 @@ perf-*.xml | |
test-*.xml | ||
|
||
# Directories | ||
.idea/ | ||
.settings/ | ||
.vs/ | ||
.vscode/ | ||
|
404 changes: 404 additions & 0 deletions
404
unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java
Large diffs are not rendered by default.
Oops, something went wrong.
194 changes: 194 additions & 0 deletions
194
unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
package org.unicode.xml; | ||
|
||
import com.ibm.icu.util.VersionInfo; | ||
import org.unicode.cldr.draft.FileUtilities; | ||
import org.unicode.props.IndexUnicodeProperties; | ||
import org.unicode.props.PropertyParsingInfo; | ||
import org.unicode.props.UcdLineParser; | ||
import org.unicode.props.UcdProperty; | ||
import org.xml.sax.SAXException; | ||
import org.xml.sax.helpers.AttributesImpl; | ||
|
||
import java.util.*; | ||
|
||
public class UCDDataResolver { | ||
|
||
private final IndexUnicodeProperties indexUnicodeProperties; | ||
private final String namespace; | ||
private final UCDXMLWriter writer; | ||
|
||
public UCDDataResolver(IndexUnicodeProperties iup, String namespace, UCDXMLWriter writer) { | ||
indexUnicodeProperties = iup; | ||
this.namespace = namespace; | ||
this.writer = writer; | ||
} | ||
|
||
public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXException { | ||
VersionInfo minVersion = ucdSection.getMinVersion(); | ||
VersionInfo maxVersion = ucdSection.getMaxVersion(); | ||
String tag = ucdSection.toString(); | ||
String childTag = ucdSection.getChildTag(); | ||
boolean parserWithRange = ucdSection.getParserWithRange(); | ||
boolean parserWithMissing = ucdSection.getParserWithMissing(); | ||
UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents(); | ||
|
||
if (isCompatibleVersion(minVersion, maxVersion)) { | ||
writer.startElement(tag); { | ||
for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) { | ||
if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) { | ||
final PropertyParsingInfo fileInfoEVS = PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty()); | ||
String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion()); | ||
UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename)); | ||
parser.withRange(parserWithRange); | ||
parser.withMissing(parserWithMissing); | ||
switch (ucdSection) { | ||
case BLOCKS: | ||
for (UcdLineParser.UcdLine line : parser) { | ||
if (!line.getOriginalLine().startsWith("#")) { | ||
AttributesImpl attributes = getBlockAttributes(namespace, line); | ||
writer.startElement(childTag, attributes); { | ||
writer.endElement(childTag); | ||
} | ||
} | ||
} | ||
break; | ||
case NAMEDSEQUENCES: | ||
HashMap<String, String> namedSequences = new HashMap<>(); | ||
for (UcdLineParser.UcdLine line : parser) { | ||
String[] parts = line.getParts(); | ||
namedSequences.put(parts[0], parts[1]); | ||
} | ||
List<String> names = new ArrayList<>(namedSequences.keySet()); | ||
Collections.sort(names); | ||
for (String name : names) { | ||
AttributesImpl attributes = getNamedSequenceAttributes(namespace, name, namedSequences); | ||
writer.startElement(childTag, attributes); { | ||
writer.endElement(childTag); | ||
} | ||
} | ||
break; | ||
default: | ||
for (UcdLineParser.UcdLine line : parser) { | ||
AttributesImpl attributes = getAttributes(ucdSection, namespace, line); | ||
writer.startElement(childTag, attributes); | ||
{ | ||
writer.endElement(childTag); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
writer.endElement(tag); | ||
} | ||
} | ||
} | ||
|
||
private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) { | ||
switch(ucdSection) { | ||
case CJKRADICALS: | ||
return getCJKRadicalAttributes(namespace, line); | ||
case DONOTEMIT: | ||
return getDoNotEmitAttributes(namespace, line); | ||
case EMOJISOURCES: | ||
return getEmojiSourceAttributes(namespace, line); | ||
case NORMALIZATIONCORRECTIONS: | ||
return getNCAttributes(namespace, line); | ||
case STANDARDIZEDVARIANTS: | ||
return getSVAttributes(namespace, line); | ||
default: | ||
throw new IllegalArgumentException("getAttributes failed on an unexpected UcdSection"); | ||
} | ||
} | ||
|
||
private static AttributesImpl getBlockAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
String[] range = parts[0].split("\\.\\."); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "first-cp", "first-cp", "CDATA", range[0]); | ||
attributes.addAttribute( | ||
namespace, "last-cp", "last-cp", "CDATA", range[1]); | ||
attributes.addAttribute( | ||
namespace, "name", "name", "CDATA", parts[1]); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getCJKRadicalAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "number", "number", "CDATA", parts[0]); | ||
attributes.addAttribute( | ||
namespace, "radical", "radical", "CDATA", parts[1]); | ||
attributes.addAttribute( | ||
namespace, "ideograph", "ideograph", "CDATA", parts[2]); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getDoNotEmitAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "of", "of", "CDATA", parts[0]); | ||
attributes.addAttribute( | ||
namespace, "use", "use", "CDATA", parts[1]); | ||
attributes.addAttribute( | ||
namespace, "because", "because", "CDATA", parts[2]); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "unicode", "unicode", "CDATA", parts[0]); | ||
attributes.addAttribute( | ||
namespace, "docomo", "docomo", "CDATA", parts[1]); | ||
attributes.addAttribute( | ||
namespace, "kddi", "kddi", "CDATA", parts[2]); | ||
attributes.addAttribute( | ||
namespace, "softbank", "softbank", "CDATA", parts[3]); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getNamedSequenceAttributes(String namespace, String name, HashMap<String, String> namedSequences) { | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "name", "name", "CDATA", name); | ||
attributes.addAttribute( | ||
namespace, "cps", "cps", "CDATA", namedSequences.get(name)); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getNCAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "cp", "cp", "CDATA", parts[0]); | ||
attributes.addAttribute( | ||
namespace, "old", "old", "CDATA", parts[1]); | ||
attributes.addAttribute( | ||
namespace, "new", "new", "CDATA", parts[2]); | ||
attributes.addAttribute( | ||
namespace, "version", "version", "CDATA", parts[3]); | ||
return attributes; | ||
} | ||
|
||
private static AttributesImpl getSVAttributes(String namespace, UcdLineParser.UcdLine line) { | ||
String[] parts = line.getParts(); | ||
AttributesImpl attributes = new AttributesImpl(); | ||
attributes.addAttribute( | ||
namespace, "cps", "cps", "CDATA", parts[0]); | ||
attributes.addAttribute( | ||
namespace, "desc", "desc", "CDATA", parts[1]); | ||
attributes.addAttribute( | ||
namespace, "when", "when", "CDATA", | ||
parts[2] != null ? parts[2] : ""); | ||
return attributes; | ||
} | ||
|
||
private boolean isCompatibleVersion(VersionInfo minVersion, VersionInfo maxVersion) { | ||
return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 && ( | ||
maxVersion == null || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0)); | ||
} | ||
} |
78 changes: 78 additions & 0 deletions
78
unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package org.unicode.xml; | ||
|
||
import org.xml.sax.Attributes; | ||
import org.xml.sax.SAXException; | ||
import org.xml.sax.helpers.AttributesImpl; | ||
|
||
import javax.xml.transform.OutputKeys; | ||
import javax.xml.transform.Transformer; | ||
import javax.xml.transform.TransformerConfigurationException; | ||
import javax.xml.transform.TransformerFactory; | ||
import javax.xml.transform.sax.SAXTransformerFactory; | ||
import javax.xml.transform.sax.TransformerHandler; | ||
import javax.xml.transform.stream.StreamResult; | ||
import java.io.FileOutputStream; | ||
|
||
public class UCDXMLWriter { | ||
|
||
public static final String NAMESPACE | ||
= "http://www.unicode.org/ns/2003/ucd/1.0"; | ||
|
||
private final TransformerHandler transformerHandler; | ||
|
||
public TransformerHandler getTransformerHandler() { | ||
return transformerHandler; | ||
} | ||
|
||
public UCDXMLWriter(FileOutputStream f) throws TransformerConfigurationException { | ||
TransformerFactory tfactory = TransformerFactory.newInstance(); | ||
SAXTransformerFactory sfactory = (SAXTransformerFactory) tfactory; | ||
transformerHandler = sfactory.newTransformerHandler (); | ||
Transformer transformer = transformerHandler.getTransformer (); | ||
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8"); | ||
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); | ||
transformer.setOutputProperty(OutputKeys.INDENT, "yes"); | ||
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes"); | ||
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3"); | ||
transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "3"); | ||
transformerHandler.setResult (new StreamResult(f)); | ||
} | ||
|
||
public void startFile() throws SAXException { | ||
transformerHandler.startDocument (); | ||
char[] c = "\n".toCharArray (); | ||
transformerHandler.characters (c, 0, c.length); | ||
c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray (); | ||
transformerHandler.comment (c, 0, c.length); | ||
c = "\n".toCharArray (); | ||
transformerHandler.characters (c, 0, c.length); | ||
c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray (); | ||
transformerHandler.comment (c, 0, c.length); | ||
c = "\n\n\n".toCharArray (); | ||
transformerHandler.characters (c, 0, c.length); | ||
|
||
} | ||
public void endFile() throws SAXException { | ||
transformerHandler.endDocument (); | ||
} | ||
|
||
public void startElement(String tagName) throws SAXException { | ||
AttributesImpl attributes = new AttributesImpl (); | ||
startElement(tagName, attributes); | ||
} | ||
|
||
public void startElement(String tagName, AttributesImpl attributes) throws SAXException { | ||
transformerHandler.startElement (NAMESPACE, tagName, tagName, attributes); | ||
} | ||
|
||
public void addContent(String s) throws SAXException { | ||
char[] d = s.toCharArray (); | ||
transformerHandler.characters (d, 0, d.length); | ||
} | ||
|
||
public void endElement(String tagName) throws SAXException { | ||
transformerHandler.endElement (NAMESPACE, tagName, tagName); | ||
} | ||
} | ||
|
||
|
Oops, something went wrong.