This is an automated email from the ASF dual-hosted git repository. dzamo pushed a commit to branch 1.21 in repository https://gitbox.apache.org/repos/asf/drill.git
commit 055d6ca4bcb47e71e8e20d8e370da4890f1885bb Author: Charles S. Givre <[email protected]> AuthorDate: Sun Nov 5 22:35:14 2023 -0500 DRILL-8461: Prevent XXE Attacks in XML Format Plugin (#2845) --- .../org/apache/drill/exec/store/xml/XMLReader.java | 3 +++ .../apache/drill/exec/store/xml/TestXMLReader.java | 13 ++++++++++ contrib/format-xml/src/test/resources/xml/bad.xml | 29 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java index 8b23ac7621..0f6864b190 100644 --- a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java +++ b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java @@ -99,6 +99,9 @@ public class XMLReader implements Closeable { public XMLReader(InputStream fsStream, int dataLevel) throws XMLStreamException { this.fsStream = fsStream; XMLInputFactory inputFactory = XMLInputFactory.newInstance(); + + // This property prevents XXE attacks by disallowing DTD. + inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); reader = inputFactory.createXMLEventReader(fsStream); fieldNameStack = new Stack<>(); rowWriterStack = new Stack<>(); diff --git a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java index 260fe9c3cb..d546d2e4c9 100644 --- a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java +++ b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java @@ -19,6 +19,7 @@ package org.apache.drill.exec.store.xml; import org.apache.drill.categories.RowSetTest; +import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.types.TypeProtos.DataMode; import org.apache.drill.common.types.TypeProtos.MinorType; import org.apache.drill.exec.physical.rowSet.RowSet; @@ -41,6 +42,8 @@ import static org.apache.drill.test.rowSet.RowSetUtilities.mapArray; import static org.apache.drill.test.rowSet.RowSetUtilities.objArray; import static org.apache.drill.test.rowSet.RowSetUtilities.strArray; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; @Category(RowSetTest.class) public class TestXMLReader extends ClusterTest { @@ -87,6 +90,16 @@ public class TestXMLReader extends ClusterTest { } @Test + public void testXXE() throws Exception { + String sql = "SELECT * FROM cp.`xml/bad.xml`"; + try { + client.queryBuilder().sql(sql).rowSet(); + fail(); + } catch (UserException e) { + assertTrue(e.getMessage().contains("DATA_READ ERROR: Error parsing XML file")); + } + } + public void testSimpleProvidedSchema() throws Exception { String sql = "SELECT * FROM table(cp.`xml/simple_with_datatypes.xml` (type => 'xml', schema " + "=> 'inline=(`int_field` INT, `bigint_field` BIGINT, `float_field` FLOAT, `double_field` DOUBLE, `boolean_field` " + diff --git a/contrib/format-xml/src/test/resources/xml/bad.xml b/contrib/format-xml/src/test/resources/xml/bad.xml new file mode 100644 index 0000000000..94625f362f --- /dev/null +++ b/contrib/format-xml/src/test/resources/xml/bad.xml @@ -0,0 +1,29 @@ +<!-- + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +--> + +<?xml version="1.0" encoding="ISO-8859-1"?> +<!DOCTYPE foo [ + <!ELEMENT foo ANY > + <!ENTITY xxe SYSTEM "/etc/passwd" >] + > +<creds> + <user>&xxe;</user> + <pass>mypass</pass> +</creds> \ No newline at end of file
