diff --git a/build.sbt b/build.sbt
index 257bc67f..3a0d6b68 100644
--- a/build.sbt
+++ b/build.sbt
@@ -113,6 +113,7 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
libraryDependencies += "junit" % "junit" % "4.13.2" % Test,
libraryDependencies += "com.github.sbt" % "junit-interface" % "0.13.3" % Test,
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % Test,
+ libraryDependencies += "xerces" % "xercesImpl" % "2.12.2" % Test,
libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match {
case Some((3, _)) =>
Seq()
diff --git a/jvm/src/test/resources/scala/xml/archive/books.xml b/jvm/src/test/resources/scala/xml/archive/books.xml
new file mode 100644
index 00000000..00a49111
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/archive/books.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/jvm/src/test/resources/scala/xml/archive/books/book/author.xml b/jvm/src/test/resources/scala/xml/archive/books/book/author.xml
new file mode 100644
index 00000000..3f2d2a9c
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/archive/books/book/author.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml b/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml
new file mode 100644
index 00000000..7577b32c
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/archive/books/book/author/volume/1.xml
@@ -0,0 +1 @@
+
diff --git a/jvm/src/test/resources/scala/xml/includee.xml b/jvm/src/test/resources/scala/xml/includee.xml
new file mode 100644
index 00000000..151eda26
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/includee.xml
@@ -0,0 +1,3 @@
+
+ Blah!
+
diff --git a/jvm/src/test/resources/scala/xml/includer.xml b/jvm/src/test/resources/scala/xml/includer.xml
new file mode 100644
index 00000000..52195478
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/includer.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/jvm/src/test/resources/scala/xml/site.xml b/jvm/src/test/resources/scala/xml/site.xml
new file mode 100644
index 00000000..9c77a297
--- /dev/null
+++ b/jvm/src/test/resources/scala/xml/site.xml
@@ -0,0 +1,3 @@
+
+
+
diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala
index 196085f4..90e4a65c 100644
--- a/jvm/src/test/scala/scala/xml/XMLTest.scala
+++ b/jvm/src/test/scala/scala/xml/XMLTest.scala
@@ -510,8 +510,15 @@ class XMLTestJVM {
}
}
+ // With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want:
+ def xercesInternal: javax.xml.parsers.SAXParserFactory =
+ javax.xml.parsers.SAXParserFactory.newInstance("com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl", null)
+
+ def xercesExternal: javax.xml.parsers.SAXParserFactory =
+ javax.xml.parsers.SAXParserFactory.newInstance("org.apache.xerces.jaxp.SAXParserFactoryImpl", null)
+
/** Default SAXParserFactory */
- val defaultParserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance
+ val defaultParserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal
@throws(classOf[org.xml.sax.SAXNotRecognizedException])
def issue17UnrecognizedFeature(): Unit = {
@@ -629,7 +636,7 @@ class XMLTestJVM {
// using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events;
// see https://github.com/scala/scala-xml/issues/506
def roundtrip(namespaceAware: Boolean, xml: String): Unit = {
- val parserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance()
+ val parserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal
parserFactory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true)
@@ -656,7 +663,7 @@ class XMLTestJVM {
@UnitTest
def useXMLReaderWithXMLFilter(): Unit = {
- val parent: org.xml.sax.XMLReader = javax.xml.parsers.SAXParserFactory.newInstance.newSAXParser.getXMLReader
+ val parent: org.xml.sax.XMLReader = xercesInternal.newSAXParser.getXMLReader
val filter: org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl(parent) {
override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
for (i <- 0 until length) if (ch(start+i) == 'a') ch(start+i) = 'b'
@@ -682,6 +689,67 @@ class XMLTestJVM {
assertTrue(gotAnError)
}
+ // Now that we can use XML parser configured to be namespace-aware,
+ // we can also configure it to be XInclude-aware and process XML Includes:
+ def check(
+ parserFactory: javax.xml.parsers.SAXParserFactory,
+ resourceName: String,
+ expected: String
+ ): Unit = {
+ parserFactory.setNamespaceAware(true)
+ parserFactory.setXIncludeAware(true)
+ val actual: String = XML
+ .withSAXParser(parserFactory.newSAXParser)
+ .load(getClass.getResource(resourceName).toString)
+ .toString
+
+ assertEquals(expected, actual)
+ }
+
+ // Here we demonstrate that XInclude works with both the external and the built-in Xerces:
+
+ val includerExpected: String =
+ s"""
+ |
+ | Blah!
+ |
+ |""".stripMargin
+
+ @UnitTest def xIncludeWithExternalXerces(): Unit = check(xercesExternal, "includer.xml", includerExpected)
+ @UnitTest def xIncludeWithInternalXerces(): Unit = check(xercesInternal, "includer.xml", includerExpected)
+
+ // And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
+ // when the XML file included contains its own include, and included files are not in the same directory:
+ // `xml:base` on the `` element is incorrect
+ // books/book/author/volume/1.xml instead of the correct
+ // archive/books/book/author/volume/1.xml!
+ val siteUnfortunatelyExpected: String =
+ s"""
+ |
+ |
+ |
+ |
+ |
+ |""".stripMargin
+
+ // Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102:
+ // - the bug was reported in October 2005 - more then seventeen years ago;
+ // - a patch fixing it (that I have not verified personally) was submitted many years ago;
+ // - the bug is still not fixed in the 2023 release of Xerces;
+ // - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664,
+ // and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear.
+ //
+ // In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account
+ // specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great:
+ // - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug;
+ // - use privately patched version of Xerces;
+ // - use Saxon DOM parsing instead of Xerces' SAX.
+ //
+ // I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
+ // has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
+ @UnitTest def xIncludeFailWithExternalXerces(): Unit = check(xercesExternal, "site.xml", siteUnfortunatelyExpected)
+ @UnitTest def xIncludeFailWithInternalXerces(): Unit = check(xercesInternal, "site.xml", siteUnfortunatelyExpected)
+
@UnitTest
def nodeSeqNs(): Unit = {
val x: NodeBuffer = {