From b05445fe6890bb54997c17a7c5fe24a5ad7818a7 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Tue, 27 Sep 2022 12:59:41 +0100 Subject: [PATCH 1/6] HADOOP-18469: centralise XML parser creation in XMLUtils undo hdfs and yarn changes remove mapreduce changes add tests secure transformer factory indent issue some review items --- .../org/apache/hadoop/conf/Configuration.java | 11 +- .../apache/hadoop/util/HostsFileReader.java | 2 +- .../java/org/apache/hadoop/util/XMLUtils.java | 96 +++++++++++++- .../org/apache/hadoop/cli/CLITestHelper.java | 5 +- .../apache/hadoop/conf/TestConfServlet.java | 4 +- .../org/apache/hadoop/util/TestXMLUtils.java | 117 ++++++++++++++++++ .../src/test/resources/xml/external-dtd.xml | 7 ++ .../tools/rumen/JobConfigurationParser.java | 3 +- .../hadoop/tools/rumen/ParsedConfigFile.java | 15 +-- 9 files changed, 239 insertions(+), 21 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 0a1e8868e3ae7..ab7ff0bd40cc2 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -24,7 +24,6 @@ import com.ctc.wstx.stax.WstxInputFactory; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; -import org.apache.hadoop.classification.VisibleForTesting; import java.io.BufferedInputStream; import java.io.DataInput; @@ -87,6 +86,7 @@ import org.apache.commons.collections.map.UnmodifiableMap; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -98,18 +98,19 @@ import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProvider.CredentialEntry; import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.XMLUtils; + import org.codehaus.stax2.XMLStreamReader2; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; -import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.thirdparty.com.google.common.base.Strings; - import static org.apache.commons.lang3.StringUtils.isBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank; @@ -3604,7 +3605,7 @@ public void writeXml(@Nullable String propertyName, Writer out, Configuration co try { DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(out); - TransformerFactory transFactory = TransformerFactory.newInstance(); + TransformerFactory transFactory = XMLUtils.newSecureTransformerFactory(); Transformer transformer = transFactory.newTransformer(); // Important to not hold Configuration log while writing result, since diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java index d94668356e261..300f8145c3162 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/HostsFileReader.java @@ -147,8 +147,8 @@ public static void readXmlFileToMapWithFileInputStream(String type, String filename, InputStream fileInputStream, Map map) throws IOException { Document dom; - DocumentBuilderFactory builder = DocumentBuilderFactory.newInstance(); try { + DocumentBuilderFactory builder = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = builder.newDocumentBuilder(); dom = db.parse(fileInputStream); // Examples: diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java index e2b9e414ad33b..4b01a07b13b21 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java @@ -18,12 +18,19 @@ package org.apache.hadoop.util; +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; import javax.xml.transform.*; +import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.stream.*; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.xml.sax.SAXException; + import java.io.*; /** @@ -33,6 +40,19 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class XMLUtils { + + private static final String DISALLOW_DOCTYPE_DECL = + "http://apache.org/xml/features/disallow-doctype-decl"; + private static final String LOAD_EXTERNAL_DECL = + "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + private static final String EXTERNAL_GENERAL_ENTITIES = + "http://xml.org/sax/features/external-general-entities"; + private static final String EXTERNAL_PARAMETER_ENTITIES = + "http://xml.org/sax/features/external-parameter-entities"; + private static final String CREATE_ENTITY_REF_NODES = + "http://apache.org/xml/features/dom/create-entity-ref-nodes"; + + /** * Transform input xml given a stylesheet. * @@ -49,7 +69,7 @@ public static void transform( ) throws TransformerConfigurationException, TransformerException { // Instantiate a TransformerFactory - TransformerFactory tFactory = TransformerFactory.newInstance(); + TransformerFactory tFactory = newSecureTransformerFactory(); // Use the TransformerFactory to process the // stylesheet and generate a Transformer @@ -61,4 +81,78 @@ public static void transform( // and send the output to a Result object. transformer.transform(new StreamSource(xml), new StreamResult(out)); } + + /** + * This method should be used if you need a {@link DocumentBuilderFactory}. Use this method instead + * of {@link DocumentBuilderFactory#newInstance()}. The factory that is returned has secure configuration + * enabled. + * + * @return a {@link DocumentBuilderFactory} with secure configuration enabled + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the secure configuration + */ + public static DocumentBuilderFactory newSecureDocumentBuilderFactory() + throws ParserConfigurationException { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); + dbf.setFeature(LOAD_EXTERNAL_DECL, false); + dbf.setFeature(EXTERNAL_GENERAL_ENTITIES , false); + dbf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); + dbf.setFeature(CREATE_ENTITY_REF_NODES, false); + return dbf; + } + + /** + * This method should be used if you need a {@link SAXParserFactory}. Use this method instead + * of {@link SAXParserFactory#newInstance()}. The factory that is returned has secure configuration + * enabled. + * + * @return a {@link SAXParserFactory} with secure configuration enabled + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the secure configuration + * @throws SAXException if there are another issues when creating the factory + */ + public static SAXParserFactory newSecureSAXParserFactory() + throws SAXException, ParserConfigurationException { + SAXParserFactory spf = SAXParserFactory.newInstance(); + spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + spf.setFeature(DISALLOW_DOCTYPE_DECL, true); + spf.setFeature(LOAD_EXTERNAL_DECL, false); + spf.setFeature(EXTERNAL_GENERAL_ENTITIES , false); + spf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); + return spf; + } + + /** + * This method should be used if you need a {@link TransformerFactory}. Use this method instead + * of {@link TransformerFactory#newInstance()}. The factory that is returned has secure configuration + * enabled. + * + * @return a {@link TransformerFactory} with secure configuration enabled + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not support the secure configuration + */ + public static TransformerFactory newSecureTransformerFactory() + throws TransformerConfigurationException { + TransformerFactory trfactory = TransformerFactory.newInstance(); + trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + return trfactory; + } + + /** + * This method should be used if you need a {@link SAXTransformerFactory}. Use this method instead + * of {@link SAXTransformerFactory#newInstance()}. The factory that is returned has secure configuration + * enabled. + * + * @return a {@link SAXTransformerFactory} with secure configuration enabled + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not support the secure configuration + */ + public static SAXTransformerFactory newSecureSAXTransformerFactory() + throws TransformerConfigurationException { + SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); + trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + trfactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, ""); + return trfactory; + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java index ada4cd80e4882..f80c62535a1f0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/cli/CLITestHelper.java @@ -24,6 +24,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.XMLUtils; + import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -34,7 +36,6 @@ import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParser; -import javax.xml.parsers.SAXParserFactory; import java.io.File; import java.util.ArrayList; @@ -76,7 +77,7 @@ protected void readTestConfigFile() { boolean success = false; testConfigFile = TEST_CACHE_DATA_DIR + File.separator + testConfigFile; try { - SAXParser p = (SAXParserFactory.newInstance()).newSAXParser(); + SAXParser p = XMLUtils.newSecureSAXParserFactory().newSAXParser(); p.parse(testConfigFile, getConfigParser()); success = true; } catch (Exception e) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java index 9d7f4255978ad..f95225e984f62 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java @@ -41,6 +41,7 @@ import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.http.HttpServer2; +import org.apache.hadoop.util.XMLUtils; import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; @@ -223,8 +224,7 @@ public void testWriteXml() throws Exception { ConfServlet.writeResponse(getTestConf(), sw, "xml"); String xml = sw.toString(); - DocumentBuilderFactory docBuilderFactory - = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory docBuilderFactory = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); Document doc = builder.parse(new InputSource(new StringReader(xml))); NodeList nameNodes = doc.getElementsByTagName("name"); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java new file mode 100644 index 0000000000000..c3ba9e8150399 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.io.InputStream; +import java.io.StringReader; +import java.io.StringWriter; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.SAXParser; +import javax.xml.transform.Transformer; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.w3c.dom.Document; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class TestXMLUtils { + + @Test + public void testSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(""))); + Assertions.assertThat(doc).describedAs("parsed document").isNotNull(); + } + + @Test(expected = SAXException.class) + public void testExternalDtdWithSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) { + Document doc = db.parse(stream); + } + } + + @Test + public void testSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + parser.parse(new InputSource(new StringReader("")), new DefaultHandler()); + } + + @Test(expected = SAXException.class) + public void testExternalDtdWithSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + try (InputStream stream = getResourceStream("/xml/external-dtd.xml")) { + parser.parse(stream, new DefaultHandler()); + } + } + + @Test + public void testSecureTransformerFactory() throws Exception { + Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer(); + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + Document doc = db.parse(new InputSource(new StringReader(""))); + try (StringWriter stringWriter = new StringWriter()) { + transformer.transform(new DOMSource(doc), new StreamResult(stringWriter)); + Assertions.assertThat(stringWriter.toString()).contains(""))); + try (StringWriter stringWriter = new StringWriter()) { + transformer.transform(new DOMSource(doc), new StreamResult(stringWriter)); + Assertions.assertThat(stringWriter.toString()).contains(" + +
+ First Last + Acme + (555) 123-4567 +
\ No newline at end of file diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java index 7e79179721f8a..d4397695686a7 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java @@ -25,6 +25,7 @@ import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import org.apache.hadoop.util.XMLUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -55,7 +56,7 @@ static Properties parse(InputStream input) throws IOException { Properties result = new Properties(); try { - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java index 1d85872c08d7b..4da61d751cbf4 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java @@ -17,28 +17,28 @@ */ package org.apache.hadoop.tools.rumen; +import java.io.StringReader; import java.util.Properties; import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.io.InputStream; -import java.io.ByteArrayInputStream; import java.io.IOException; -import java.nio.charset.Charset; - import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.util.XMLUtils; + import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.w3c.dom.Element; import org.w3c.dom.Text; +import org.xml.sax.InputSource; import org.xml.sax.SAXException; class ParsedConfigFile { @@ -46,7 +46,6 @@ class ParsedConfigFile { Pattern.compile("_(job_[0-9]+_[0-9]+)_"); private static final Pattern heapPattern = Pattern.compile("-Xmx([0-9]+)([mMgG])"); - private static final Charset UTF_8 = Charset.forName("UTF-8"); final int heapMegabytes; @@ -103,13 +102,11 @@ private int maybeGetIntValue(String propName, String attr, String value, } try { - InputStream is = new ByteArrayInputStream(xmlString.getBytes(UTF_8)); - - DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory(); DocumentBuilder db = dbf.newDocumentBuilder(); - Document doc = db.parse(is); + Document doc = db.parse(new InputSource(new StringReader(xmlString))); Element root = doc.getDocumentElement(); From b45ba50c7026731b3b00b24371762c1a9282b69a Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 1 Oct 2022 00:38:01 +0100 Subject: [PATCH 2/6] build failure --- .../java/org/apache/hadoop/util/XMLUtils.java | 36 ++++++++++--------- .../org/apache/hadoop/util/TestXMLUtils.java | 5 +-- .../hadoop/tools/rumen/ParsedConfigFile.java | 3 +- 3 files changed, 24 insertions(+), 20 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java index 4b01a07b13b21..dc024c7135b50 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java @@ -83,12 +83,13 @@ public static void transform( } /** - * This method should be used if you need a {@link DocumentBuilderFactory}. Use this method instead - * of {@link DocumentBuilderFactory#newInstance()}. The factory that is returned has secure configuration - * enabled. + * This method should be used if you need a {@link DocumentBuilderFactory}. Use this method + * instead of {@link DocumentBuilderFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. * * @return a {@link DocumentBuilderFactory} with secure configuration enabled - * @throws ParserConfigurationException if the {@code JAXP} parser does not support the secure configuration + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the + * secure configuration */ public static DocumentBuilderFactory newSecureDocumentBuilderFactory() throws ParserConfigurationException { @@ -103,12 +104,13 @@ public static DocumentBuilderFactory newSecureDocumentBuilderFactory() } /** - * This method should be used if you need a {@link SAXParserFactory}. Use this method instead - * of {@link SAXParserFactory#newInstance()}. The factory that is returned has secure configuration - * enabled. + * This method should be used if you need a {@link SAXParserFactory}. Use this method + * instead of {@link SAXParserFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. * * @return a {@link SAXParserFactory} with secure configuration enabled - * @throws ParserConfigurationException if the {@code JAXP} parser does not support the secure configuration + * @throws ParserConfigurationException if the {@code JAXP} parser does not support the + * secure configuration * @throws SAXException if there are another issues when creating the factory */ public static SAXParserFactory newSecureSAXParserFactory() @@ -123,12 +125,13 @@ public static SAXParserFactory newSecureSAXParserFactory() } /** - * This method should be used if you need a {@link TransformerFactory}. Use this method instead - * of {@link TransformerFactory#newInstance()}. The factory that is returned has secure configuration - * enabled. + * This method should be used if you need a {@link TransformerFactory}. Use this method + * instead of {@link TransformerFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. * * @return a {@link TransformerFactory} with secure configuration enabled - * @throws TransformerConfigurationException if the {@code JAXP} transformer does not support the secure configuration + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not + * support the secure configuration */ public static TransformerFactory newSecureTransformerFactory() throws TransformerConfigurationException { @@ -140,12 +143,13 @@ public static TransformerFactory newSecureTransformerFactory() } /** - * This method should be used if you need a {@link SAXTransformerFactory}. Use this method instead - * of {@link SAXTransformerFactory#newInstance()}. The factory that is returned has secure configuration - * enabled. + * This method should be used if you need a {@link SAXTransformerFactory}. Use this method + * instead of {@link SAXTransformerFactory#newInstance()}. The factory that is returned has + * secure configuration enabled. * * @return a {@link SAXTransformerFactory} with secure configuration enabled - * @throws TransformerConfigurationException if the {@code JAXP} transformer does not support the secure configuration + * @throws TransformerConfigurationException if the {@code JAXP} transformer does not + * support the secure configuration */ public static SAXTransformerFactory newSecureSAXTransformerFactory() throws TransformerConfigurationException { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java index c3ba9e8150399..7585c91327194 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -23,6 +23,7 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.SAXParser; import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; @@ -76,7 +77,7 @@ public void testSecureTransformerFactory() throws Exception { } } - @Test(expected = SAXException.class) + @Test(expected = TransformerException.class) public void testExternalDtdWithSecureTransformerFactory() throws Exception { Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer(); try ( @@ -99,7 +100,7 @@ public void testSecureSAXTransformerFactory() throws Exception { } } - @Test(expected = SAXException.class) + @Test(expected = TransformerException.class) public void testExternalDtdWithSecureSAXTransformerFactory() throws Exception { Transformer transformer = XMLUtils.newSecureSAXTransformerFactory().newTransformer(); try ( diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java index 4da61d751cbf4..a6c8bdad87d04 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/ParsedConfigFile.java @@ -17,13 +17,12 @@ */ package org.apache.hadoop.tools.rumen; +import java.io.IOException; import java.io.StringReader; import java.util.Properties; import java.util.regex.Pattern; import java.util.regex.Matcher; -import java.io.IOException; - import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.ParserConfigurationException; From e2050715e5ef04d499da8dc7a90ef3dd59ebe02f Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 1 Oct 2022 11:18:44 +0100 Subject: [PATCH 3/6] more tests --- .../java/org/apache/hadoop/util/XMLUtils.java | 4 +- .../apache/hadoop/conf/TestConfServlet.java | 2 + .../org/apache/hadoop/util/TestXMLUtils.java | 40 ++++++++++++++++++- .../src/test/resources/xml/entity-dtd.xml | 22 ++++++++++ .../src/test/resources/xml/external-dtd.xml | 18 ++++++++- 5 files changed, 81 insertions(+), 5 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/resources/xml/entity-dtd.xml diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java index dc024c7135b50..0b5084f5f785d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/XMLUtils.java @@ -97,7 +97,7 @@ public static DocumentBuilderFactory newSecureDocumentBuilderFactory() dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); dbf.setFeature(DISALLOW_DOCTYPE_DECL, true); dbf.setFeature(LOAD_EXTERNAL_DECL, false); - dbf.setFeature(EXTERNAL_GENERAL_ENTITIES , false); + dbf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); dbf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); dbf.setFeature(CREATE_ENTITY_REF_NODES, false); return dbf; @@ -119,7 +119,7 @@ public static SAXParserFactory newSecureSAXParserFactory() spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); spf.setFeature(DISALLOW_DOCTYPE_DECL, true); spf.setFeature(LOAD_EXTERNAL_DECL, false); - spf.setFeature(EXTERNAL_GENERAL_ENTITIES , false); + spf.setFeature(EXTERNAL_GENERAL_ENTITIES, false); spf.setFeature(EXTERNAL_PARAMETER_ENTITIES, false); return spf; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java index f95225e984f62..6db47d6d22fda 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfServlet.java @@ -42,9 +42,11 @@ import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.util.XMLUtils; + import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; + import static org.mockito.Mockito.when; import static org.mockito.Mockito.mock; import static org.junit.Assert.*; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java index 7585c91327194..0ca56397cc6ef 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -52,6 +52,14 @@ public void testExternalDtdWithSecureDocumentBuilderFactory() throws Exception { } } + @Test(expected = SAXException.class) + public void testEntityDtdWithSecureDocumentBuilderFactory() throws Exception { + DocumentBuilder db = XMLUtils.newSecureDocumentBuilderFactory().newDocumentBuilder(); + try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) { + Document doc = db.parse(stream); + } + } + @Test public void testSecureSAXParserFactory() throws Exception { SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); @@ -66,6 +74,14 @@ public void testExternalDtdWithSecureSAXParserFactory() throws Exception { } } + @Test(expected = SAXException.class) + public void testEntityDtdWithSecureSAXParserFactory() throws Exception { + SAXParser parser = XMLUtils.newSecureSAXParserFactory().newSAXParser(); + try (InputStream stream = getResourceStream("/xml/entity-dtd.xml")) { + parser.parse(stream, new DefaultHandler()); + } + } + @Test public void testSecureTransformerFactory() throws Exception { Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer(); @@ -85,7 +101,17 @@ public void testExternalDtdWithSecureTransformerFactory() throws Exception { StringWriter stringWriter = new StringWriter() ) { transformer.transform(new StreamSource(stream), new StreamResult(stringWriter)); - Assertions.assertThat(stringWriter.toString()).contains(" + + + + ]> +&lol; diff --git a/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml b/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml index b3f096472516c..08a13938f5f76 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/xml/external-dtd.xml @@ -1,7 +1,23 @@ +
First Last Acme (555) 123-4567 -
\ No newline at end of file + From edb2decbe6f8526af588ba6680e57eee789fd789 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 1 Oct 2022 11:23:41 +0100 Subject: [PATCH 4/6] Update JobConfigurationParser.java --- .../org/apache/hadoop/tools/rumen/JobConfigurationParser.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java index d4397695686a7..9cd2f4778fc7f 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JobConfigurationParser.java @@ -26,6 +26,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.hadoop.util.XMLUtils; + import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; From 93a7556c6c833fa84beb0ebbbf7de97589a13f2a Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sat, 1 Oct 2022 15:40:41 +0100 Subject: [PATCH 5/6] remove broken tests --- .../org/apache/hadoop/util/TestXMLUtils.java | 22 ------------------- 1 file changed, 22 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java index 0ca56397cc6ef..b907107346688 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -104,17 +104,6 @@ public void testExternalDtdWithSecureTransformerFactory() throws Exception { } } - @Test(expected = TransformerException.class) - public void testEntityDtdWithSecureTransformerFactory() throws Exception { - Transformer transformer = XMLUtils.newSecureTransformerFactory().newTransformer(); - try ( - InputStream stream = getResourceStream("/xml/entity-dtd.xml"); - StringWriter stringWriter = new StringWriter() - ) { - transformer.transform(new StreamSource(stream), new StreamResult(stringWriter)); - } - } - @Test public void testSecureSAXTransformerFactory() throws Exception { Transformer transformer = XMLUtils.newSecureSAXTransformerFactory().newTransformer(); @@ -137,17 +126,6 @@ public void testExternalDtdWithSecureSAXTransformerFactory() throws Exception { } } - @Test(expected = TransformerException.class) - public void testEntityDtdWithSecureSAXTransformerFactory() throws Exception { - Transformer transformer = XMLUtils.newSecureSAXTransformerFactory().newTransformer(); - try ( - InputStream stream = getResourceStream("/xml/entity-dtd.xml"); - StringWriter stringWriter = new StringWriter() - ) { - transformer.transform(new StreamSource(stream), new StreamResult(stringWriter)); - } - } - private static InputStream getResourceStream(final String filename) { return TestXMLUtils.class.getResourceAsStream(filename); } From e44488d863e24c85ac482f192402e1175097b438 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 6 Oct 2022 12:38:06 +0100 Subject: [PATCH 6/6] test classes should extend AbstractHadoopTestBase --- .../src/test/java/org/apache/hadoop/util/TestXMLUtils.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java index b907107346688..ec1b74338a113 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestXMLUtils.java @@ -28,6 +28,8 @@ import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; +import org.apache.hadoop.test.AbstractHadoopTestBase; + import org.assertj.core.api.Assertions; import org.junit.Test; import org.w3c.dom.Document; @@ -35,7 +37,7 @@ import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; -public class TestXMLUtils { +public class TestXMLUtils extends AbstractHadoopTestBase { @Test public void testSecureDocumentBuilderFactory() throws Exception {