diff --git a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java index 32eadda6d49..f34082e2625 100644 --- a/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java +++ b/jackrabbit-core/src/test/java/org/apache/jackrabbit/core/xml/DocumentViewTest.java @@ -127,4 +127,41 @@ public void testMultiValue() throws Exception { } } + /** + * Test case for + * JCR-4935: + * session.exportDocumentView() generates unparsable XML if a JCR Property contains invalid XML character + */ + public void testInvalidXmlCharacter() throws Exception { + + Node root = superuser.getRootNode(); + + Node node = root.addNode("invalid-xml-character-test", "nt:unstructured"); + node.setProperty("0x3", "\u0003"); + node.setProperty("0xB", "\u000B"); + node.setProperty("0xC", "\u000C"); + node.setProperty("0x19", "\u0019"); + node.setProperty("0xD800", "\uD800"); + node.setProperty("0xFFFE", "\uFFFE"); + node.setProperty("0xD800", "\uD800"); + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + superuser.exportDocumentView("/invalid-xml-character-test", buffer, true, true); + superuser.refresh(false); + + superuser.importXML( + "/", new ByteArrayInputStream(buffer.toByteArray()), + ImportUUIDBehavior.IMPORT_UUID_COLLISION_THROW); + + node = root.getNode("invalid-xml-character-test"); + assertEquals("\\u0003", node.getProperty("0x3").getString()); + assertEquals("\\u000b", node.getProperty("0xB").getString()); + assertEquals("\\u000c", node.getProperty("0xC").getString()); + assertEquals("\\u0019", node.getProperty("0x19").getString()); + assertEquals("\\ud800", node.getProperty("0xD800").getString()); + assertEquals("\\ufffe", node.getProperty("0xFFFE").getString()); + assertEquals("\\ud800", node.getProperty("0xD800").getString()); + } + + } diff --git a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java index a053da27a33..7d9b7532fef 100644 --- a/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java +++ b/jackrabbit-jcr-commons/src/main/java/org/apache/jackrabbit/commons/xml/ToXmlContentHandler.java @@ -24,6 +24,8 @@ import java.io.Writer; import java.nio.charset.StandardCharsets; +import org.apache.jackrabbit.util.Text; +import org.apache.jackrabbit.util.XMLChar; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; @@ -140,7 +142,11 @@ private void write(char[] ch, int start, int length, boolean attribute) } else if (attribute && ch[i] == '\'') { writer.write("'"); } else { - writer.write(ch[i]); + if (XMLChar.isValid(ch[i])) { + writer.write(ch[i]); + } else { + writer.append(escapeIllegalXmlChar(ch[i])); + } } } catch (IOException e) { throw new SAXException( @@ -149,6 +155,22 @@ private void write(char[] ch, int start, int length, boolean attribute) } } + /** + * Escape invalid xml characters to Unicode code points, + * similar to FileVault . + * + * See https://jackrabbit.apache.org/filevault/docview.html#escaping + */ + private String escapeIllegalXmlChar(char c){ + StringBuilder buf = new StringBuilder(); + buf.append("\\u"); + buf.append(Text.hexTable[(c >> 12) & 15]); + buf.append(Text.hexTable[(c >> 8) & 15]); + buf.append(Text.hexTable[(c >> 4) & 15]); + buf.append(Text.hexTable[c & 15]); + return buf.toString(); + } + private void closeStartTagIfOpen() throws SAXException { if (startTagIsOpen) { try { @@ -275,5 +297,4 @@ public void endElement( public String toString() { return writer.toString(); } - }