Skip to content

Commit

Permalink
Rebase
Browse files Browse the repository at this point in the history
  • Loading branch information
jowilco committed Jun 26, 2024
1 parent 0ba5996 commit 7764f6c
Show file tree
Hide file tree
Showing 6 changed files with 2,884 additions and 1,233 deletions.
184 changes: 114 additions & 70 deletions unicodetools/src/main/java/org/unicode/xml/AttributeResolver.java

Large diffs are not rendered by default.

122 changes: 56 additions & 66 deletions unicodetools/src/main/java/org/unicode/xml/UCDDataResolver.java
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
package org.unicode.xml;

import com.ibm.icu.util.VersionInfo;
import java.util.*;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.PropertyParsingInfo;
import org.unicode.props.UcdLineParser;
import org.unicode.props.UcdProperty;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import java.util.*;

public class UCDDataResolver {

private final IndexUnicodeProperties indexUnicodeProperties;
Expand All @@ -30,24 +28,31 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep
String childTag = ucdSection.getChildTag();
boolean parserWithRange = ucdSection.getParserWithRange();
boolean parserWithMissing = ucdSection.getParserWithMissing();
UcdSectionComponent[] ucdSectionComponents = ucdSection.getUcdSectionDetail().getUcdSectionComponents();
UcdSectionComponent[] ucdSectionComponents =
ucdSection.getUcdSectionDetail().getUcdSectionComponents();

if (isCompatibleVersion(minVersion, maxVersion)) {
writer.startElement(tag);
{
for (UcdSectionComponent ucdSectionComponent : ucdSectionComponents) {
if (isCompatibleVersion(ucdSectionComponent.getMinVersion(), ucdSectionComponent.getMaxVersion())) {
if (isCompatibleVersion(
ucdSectionComponent.getMinVersion(),
ucdSectionComponent.getMaxVersion())) {
final PropertyParsingInfo fileInfoEVS =
PropertyParsingInfo.getPropertyInfo(ucdSectionComponent.getUcdProperty());
String fullFilename = fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion());
UcdLineParser parser = new UcdLineParser(FileUtilities.in("", fullFilename));
PropertyParsingInfo.getPropertyInfo(
ucdSectionComponent.getUcdProperty());
String fullFilename =
fileInfoEVS.getFullFileName(indexUnicodeProperties.getUcdVersion());
UcdLineParser parser =
new UcdLineParser(FileUtilities.in("", fullFilename));
parser.withRange(parserWithRange);
parser.withMissing(parserWithMissing);
switch (ucdSection) {
case BLOCKS:
for (UcdLineParser.UcdLine line : parser) {
if (!line.getOriginalLine().startsWith("#")) {
AttributesImpl attributes = getBlockAttributes(namespace, line);
AttributesImpl attributes =
getBlockAttributes(namespace, line);
writer.startElement(childTag, attributes);
{
writer.endElement(childTag);
Expand All @@ -64,8 +69,9 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep
List<String> names = new ArrayList<>(namedSequences.keySet());
Collections.sort(names);
for (String name : names) {
AttributesImpl attributes = getNamedSequenceAttributes(namespace, name,
namedSequences);
AttributesImpl attributes =
getNamedSequenceAttributes(
namespace, name, namedSequences);
writer.startElement(childTag, attributes);
{
writer.endElement(childTag);
Expand All @@ -74,7 +80,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep
break;
default:
for (UcdLineParser.UcdLine line : parser) {
AttributesImpl attributes = getAttributes(ucdSection, namespace, line);
AttributesImpl attributes =
getAttributes(ucdSection, namespace, line);
writer.startElement(childTag, attributes);
{
writer.endElement(childTag);
Expand All @@ -88,8 +95,8 @@ public void buildSection(UcdSectionDetail.UcdSection ucdSection) throws SAXExcep
}
}

private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, String namespace,
UcdLineParser.UcdLine line) {
private AttributesImpl getAttributes(
UcdSectionDetail.UcdSection ucdSection, String namespace, UcdLineParser.UcdLine line) {
switch (ucdSection) {
case CJKRADICALS:
return getCJKRadicalAttributes(namespace, line);
Expand All @@ -102,100 +109,83 @@ private AttributesImpl getAttributes(UcdSectionDetail.UcdSection ucdSection, Str
case STANDARDIZEDVARIANTS:
return getSVAttributes(namespace, line);
default:
throw new IllegalArgumentException("getAttributes failed on an unexpected UcdSection");
throw new IllegalArgumentException(
"getAttributes failed on an unexpected UcdSection");
}
}

private static AttributesImpl getBlockAttributes(String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
String[] range = parts[0].split("\\.\\.");
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "first-cp", "first-cp", "CDATA", range[0]);
attributes.addAttribute(
namespace, "last-cp", "last-cp", "CDATA", range[1]);
attributes.addAttribute(
namespace, "name", "name", "CDATA", parts[1]);
attributes.addAttribute(namespace, "first-cp", "first-cp", "CDATA", range[0]);
attributes.addAttribute(namespace, "last-cp", "last-cp", "CDATA", range[1]);
attributes.addAttribute(namespace, "name", "name", "CDATA", parts[1]);
return attributes;
}

private static AttributesImpl getCJKRadicalAttributes(String namespace, UcdLineParser.UcdLine line) {
private static AttributesImpl getCJKRadicalAttributes(
String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "number", "number", "CDATA", parts[0]);
attributes.addAttribute(
namespace, "radical", "radical", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "ideograph", "ideograph", "CDATA", parts[2]);
attributes.addAttribute(namespace, "number", "number", "CDATA", parts[0]);
attributes.addAttribute(namespace, "radical", "radical", "CDATA", parts[1]);
attributes.addAttribute(namespace, "ideograph", "ideograph", "CDATA", parts[2]);
return attributes;
}

private static AttributesImpl getDoNotEmitAttributes(String namespace, UcdLineParser.UcdLine line) {
private static AttributesImpl getDoNotEmitAttributes(
String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "of", "of", "CDATA", parts[0]);
attributes.addAttribute(
namespace, "use", "use", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "because", "because", "CDATA", parts[2]);
attributes.addAttribute(namespace, "of", "of", "CDATA", parts[0]);
attributes.addAttribute(namespace, "use", "use", "CDATA", parts[1]);
attributes.addAttribute(namespace, "because", "because", "CDATA", parts[2]);
return attributes;
}

private static AttributesImpl getEmojiSourceAttributes(String namespace, UcdLineParser.UcdLine line) {
private static AttributesImpl getEmojiSourceAttributes(
String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "unicode", "unicode", "CDATA", parts[0]);
attributes.addAttribute(
namespace, "docomo", "docomo", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "kddi", "kddi", "CDATA", parts[2]);
attributes.addAttribute(
namespace, "softbank", "softbank", "CDATA", parts[3]);
attributes.addAttribute(namespace, "unicode", "unicode", "CDATA", parts[0]);
attributes.addAttribute(namespace, "docomo", "docomo", "CDATA", parts[1]);
attributes.addAttribute(namespace, "kddi", "kddi", "CDATA", parts[2]);
attributes.addAttribute(namespace, "softbank", "softbank", "CDATA", parts[3]);
return attributes;
}

private static AttributesImpl getNamedSequenceAttributes(String namespace, String name,
HashMap<String, String> namedSequences) {
private static AttributesImpl getNamedSequenceAttributes(
String namespace, String name, HashMap<String, String> namedSequences) {
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "name", "name", "CDATA", name);
attributes.addAttribute(
namespace, "cps", "cps", "CDATA", namedSequences.get(name));
attributes.addAttribute(namespace, "name", "name", "CDATA", name);
attributes.addAttribute(namespace, "cps", "cps", "CDATA", namedSequences.get(name));
return attributes;
}

private static AttributesImpl getNCAttributes(String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(
namespace, "cp", "cp", "CDATA", parts[0]);
attributes.addAttribute(
namespace, "old", "old", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "new", "new", "CDATA", parts[2]);
attributes.addAttribute(
namespace, "version", "version", "CDATA", parts[3]);
attributes.addAttribute(namespace, "cp", "cp", "CDATA", parts[0]);
attributes.addAttribute(namespace, "old", "old", "CDATA", parts[1]);
attributes.addAttribute(namespace, "new", "new", "CDATA", parts[2]);
attributes.addAttribute(namespace, "version", "version", "CDATA", parts[3]);
return attributes;
}

private static AttributesImpl getSVAttributes(String namespace, UcdLineParser.UcdLine line) {
String[] parts = line.getParts();
AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(namespace, "cps", "cps", "CDATA", parts[0]);
attributes.addAttribute(namespace, "desc", "desc", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "cps", "cps", "CDATA", parts[0]);
attributes.addAttribute(
namespace, "desc", "desc", "CDATA", parts[1]);
attributes.addAttribute(
namespace, "when", "when", "CDATA",
parts[2] != null ? parts[2] : "");
namespace, "when", "when", "CDATA", parts[2] != null ? parts[2] : "");
return attributes;
}

private boolean isCompatibleVersion(VersionInfo minVersion, VersionInfo maxVersion) {
return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0 && (
maxVersion == null || indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0));
return (indexUnicodeProperties.getUcdVersion().compareTo(minVersion) >= 0
&& (maxVersion == null
|| indexUnicodeProperties.getUcdVersion().compareTo(maxVersion) <= 0));
}
}
57 changes: 26 additions & 31 deletions unicodetools/src/main/java/org/unicode/xml/UCDXMLWriter.java
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
package org.unicode.xml;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

import java.io.FileOutputStream;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import java.io.FileOutputStream;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

public class UCDXMLWriter {

public static final String NAMESPACE
= "http://www.unicode.org/ns/2003/ucd/1.0";
public static final String NAMESPACE = "http://www.unicode.org/ns/2003/ucd/1.0";

private final TransformerHandler transformerHandler;

Expand All @@ -27,53 +24,51 @@ public TransformerHandler getTransformerHandler() {
public UCDXMLWriter(FileOutputStream f) throws TransformerConfigurationException {
TransformerFactory tfactory = TransformerFactory.newInstance();
SAXTransformerFactory sfactory = (SAXTransformerFactory) tfactory;
transformerHandler = sfactory.newTransformerHandler ();
Transformer transformer = transformerHandler.getTransformer ();
transformerHandler = sfactory.newTransformerHandler();
Transformer transformer = transformerHandler.getTransformer();
transformer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.STANDALONE, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "3");
transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "3");
transformerHandler.setResult (new StreamResult(f));
transformerHandler.setResult(new StreamResult(f));
}

public void startFile() throws SAXException {
transformerHandler.startDocument ();
char[] c = "\n".toCharArray ();
transformerHandler.characters (c, 0, c.length);
//TODO: JRW change hardcoded 2023 to current year.
c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray ();
transformerHandler.comment (c, 0, c.length);
c = "\n".toCharArray ();
transformerHandler.characters (c, 0, c.length);
c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray ();
transformerHandler.comment (c, 0, c.length);
c = "\n\n\n".toCharArray ();
transformerHandler.characters (c, 0, c.length);

transformerHandler.startDocument();
char[] c = "\n".toCharArray();
transformerHandler.characters(c, 0, c.length);
// TODO: JRW change hardcoded 2023 to current year.
c = " \u00A9 2023 Unicode\u00AE, Inc. ".toCharArray();
transformerHandler.comment(c, 0, c.length);
c = "\n".toCharArray();
transformerHandler.characters(c, 0, c.length);
c = " For terms of use, see http://www.unicode.org/terms_of_use.html ".toCharArray();
transformerHandler.comment(c, 0, c.length);
c = "\n\n\n".toCharArray();
transformerHandler.characters(c, 0, c.length);
}

public void endFile() throws SAXException {
transformerHandler.endDocument ();
transformerHandler.endDocument();
}

public void startElement(String tagName) throws SAXException {
AttributesImpl attributes = new AttributesImpl ();
AttributesImpl attributes = new AttributesImpl();
startElement(tagName, attributes);
}

public void startElement(String tagName, AttributesImpl attributes) throws SAXException {
transformerHandler.startElement (NAMESPACE, tagName, tagName, attributes);
transformerHandler.startElement(NAMESPACE, tagName, tagName, attributes);
}

public void addContent(String s) throws SAXException {
char[] d = s.toCharArray ();
transformerHandler.characters (d, 0, d.length);
char[] d = s.toCharArray();
transformerHandler.characters(d, 0, d.length);
}

public void endElement(String tagName) throws SAXException {
transformerHandler.endElement (NAMESPACE, tagName, tagName);
transformerHandler.endElement(NAMESPACE, tagName, tagName);
}
}


Loading

0 comments on commit 7764f6c

Please sign in to comment.