From 772347431022120704153606883b9b1abcf489f1 Mon Sep 17 00:00:00 2001 From: Egor Kozlov Date: Mon, 29 May 2023 15:29:19 +0300 Subject: [PATCH 1/2] JCR-4935 skip invalid xml charcters in session.exportDocumentView() --- .../jackrabbit/core/xml/DocumentViewTest.java | 37 +++++++++++++++++++ .../commons/xml/ToXmlContentHandler.java | 4 +- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java index 32eadda6d49..c57eb185664 100644 --- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java +++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java @@ -127,4 +127,41 @@ public void testMultiValue() throws Exception { } } + /** + * Test case for + * JCR-4935: + * session.exportDocumentView() generates unparsable XML if a JCR Property contains invalid XML character + */ + public void testInvalidXmlCharacter() throws Exception { + + Node root = superuser.getRootNode(); + + Node node = root.addNode("invalid-xml-character-test", "nt:unstructured"); + node.setProperty("0x3", "\u0003"); + node.setProperty("0xB", "\u000B"); + node.setProperty("0xC", "\u000C"); + node.setProperty("0x19", "\u0019"); + node.setProperty("0xD800", "\uD800"); + node.setProperty("0xFFFE", "\uFFFE"); + node.setProperty("0xD800", "\uD800"); + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + superuser.exportDocumentView("/invalid-xml-character-test", buffer, true, true); + superuser.refresh(false); + + superuser.importXML( + "/", new ByteArrayInputStream(buffer.toByteArray()), + ImportUUIDBehavior.IMPORT_UUID_COLLISION_THROW); + + node = root.getNode("invalid-xml-character-test"); + assertEquals("", node.getProperty("0x3").getString()); + assertEquals("", node.getProperty("0xB").getString()); + assertEquals("", node.getProperty("0xC").getString()); + assertEquals("", node.getProperty("0x19").getString()); + assertEquals("", node.getProperty("0xD800").getString()); + assertEquals("", node.getProperty("0xFFFE").getString()); + assertEquals("", node.getProperty("0xD800").getString()); + } + + } diff --git a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java index a053da27a33..35615d50ea4 100644 --- a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java +++ b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java @@ -24,6 +24,7 @@ import java.io.Writer; import java.nio.charset.StandardCharsets; +import org.apache.jackrabbit.util.XMLChar; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -139,7 +140,7 @@ private void write(char[] ch, int start, int length, boolean attribute) writer.write("""); } else if (attribute && ch[i] == '\'') { writer.write("'"); - } else { + } else if (XMLChar.isValid(ch[i])){ writer.write(ch[i]); } } catch (IOException e) { @@ -275,5 +276,4 @@ public void endElement( public String toString() { return writer.toString(); } - } From b0e3cfad5bc248763a5e36dde78d4e4111dee5ad Mon Sep 17 00:00:00 2001 From: Egor Kozlov Date: Wed, 31 May 2023 15:40:32 +0300 Subject: [PATCH 2/2] JCR-4935 convert illegal xml characters to unicode code points --- .../jackrabbit/core/xml/DocumentViewTest.java | 14 +++++------ .../commons/xml/ToXmlContentHandler.java | 25 +++++++++++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java index c57eb185664..f34082e2625 100644 --- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java +++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java @@ -154,13 +154,13 @@ public void testInvalidXmlCharacter() throws Exception { ImportUUIDBehavior.IMPORT_UUID_COLLISION_THROW); node = root.getNode("invalid-xml-character-test"); - assertEquals("", node.getProperty("0x3").getString()); - assertEquals("", node.getProperty("0xB").getString()); - assertEquals("", node.getProperty("0xC").getString()); - assertEquals("", node.getProperty("0x19").getString()); - assertEquals("", node.getProperty("0xD800").getString()); - assertEquals("", node.getProperty("0xFFFE").getString()); - assertEquals("", node.getProperty("0xD800").getString()); + assertEquals("\\u0003", node.getProperty("0x3").getString()); + assertEquals("\\u000b", node.getProperty("0xB").getString()); + assertEquals("\\u000c", node.getProperty("0xC").getString()); + assertEquals("\\u0019", node.getProperty("0x19").getString()); + assertEquals("\\ud800", node.getProperty("0xD800").getString()); + assertEquals("\\ufffe", node.getProperty("0xFFFE").getString()); + assertEquals("\\ud800", node.getProperty("0xD800").getString()); } diff --git a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java index 35615d50ea4..7d9b7532fef 100644 --- a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java +++ b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java @@ -24,6 +24,7 @@ import java.io.Writer; import java.nio.charset.StandardCharsets; +import org.apache.jackrabbit.util.Text; import org.apache.jackrabbit.util.XMLChar; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; @@ -140,8 +141,12 @@ private void write(char[] ch, int start, int length, boolean attribute) writer.write("""); } else if (attribute && ch[i] == '\'') { writer.write("'"); - } else if (XMLChar.isValid(ch[i])){ - writer.write(ch[i]); + } else { + if (XMLChar.isValid(ch[i])) { + writer.write(ch[i]); + } else { + writer.append(escapeIllegalXmlChar(ch[i])); + } } } catch (IOException e) { throw new SAXException( @@ -150,6 +155,22 @@ private void write(char[] ch, int start, int length, boolean attribute) } } + /** + * Escape invalid xml characters to Unicode code points, + * similar to FileVault . + * + * See https://jackrabbit.apache.org/filevault/docview.html#escaping + */ + private String escapeIllegalXmlChar(char c){ + StringBuilder buf = new StringBuilder(); + buf.append("\\u"); + buf.append(Text.hexTable[(c >> 12) & 15]); + buf.append(Text.hexTable[(c >> 8) & 15]); + buf.append(Text.hexTable[(c >> 4) & 15]); + buf.append(Text.hexTable[c & 15]); + return buf.toString(); + } + private void closeStartTagIfOpen() throws SAXException { if (startTagIsOpen) { try {