This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch 1.21
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 055d6ca4bcb47e71e8e20d8e370da4890f1885bb
Author: Charles S. Givre <[email protected]>
AuthorDate: Sun Nov 5 22:35:14 2023 -0500

    DRILL-8461: Prevent XXE Attacks in XML Format Plugin (#2845)
---
 .../org/apache/drill/exec/store/xml/XMLReader.java |  3 +++
 .../apache/drill/exec/store/xml/TestXMLReader.java | 13 ++++++++++
 contrib/format-xml/src/test/resources/xml/bad.xml  | 29 ++++++++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
index 8b23ac7621..0f6864b190 100644
--- 
a/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
+++ 
b/contrib/format-xml/src/main/java/org/apache/drill/exec/store/xml/XMLReader.java
@@ -99,6 +99,9 @@ public class XMLReader implements Closeable {
   public XMLReader(InputStream fsStream, int dataLevel) throws 
XMLStreamException {
     this.fsStream = fsStream;
     XMLInputFactory inputFactory = XMLInputFactory.newInstance();
+
+    // This property prevents XXE attacks by disallowing DTD.
+    inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
     reader = inputFactory.createXMLEventReader(fsStream);
     fieldNameStack = new Stack<>();
     rowWriterStack = new Stack<>();
diff --git 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
index 260fe9c3cb..d546d2e4c9 100644
--- 
a/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
+++ 
b/contrib/format-xml/src/test/java/org/apache/drill/exec/store/xml/TestXMLReader.java
@@ -19,6 +19,7 @@
 package org.apache.drill.exec.store.xml;
 
 import org.apache.drill.categories.RowSetTest;
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.physical.rowSet.RowSet;
@@ -41,6 +42,8 @@ import static 
org.apache.drill.test.rowSet.RowSetUtilities.mapArray;
 import static org.apache.drill.test.rowSet.RowSetUtilities.objArray;
 import static org.apache.drill.test.rowSet.RowSetUtilities.strArray;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 @Category(RowSetTest.class)
 public class TestXMLReader extends ClusterTest {
@@ -87,6 +90,16 @@ public class TestXMLReader extends ClusterTest {
   }
 
   @Test
+  public void testXXE() throws Exception {
+    String sql = "SELECT * FROM cp.`xml/bad.xml`";
+    try {
+      client.queryBuilder().sql(sql).rowSet();
+      fail();
+    } catch (UserException e) {
+       assertTrue(e.getMessage().contains("DATA_READ ERROR: Error parsing XML 
file"));
+    }
+  }
+
   public void testSimpleProvidedSchema() throws Exception {
     String sql = "SELECT * FROM table(cp.`xml/simple_with_datatypes.xml` (type 
=> 'xml', schema " +
       "=> 'inline=(`int_field` INT, `bigint_field` BIGINT, `float_field` 
FLOAT, `double_field` DOUBLE, `boolean_field` " +
diff --git a/contrib/format-xml/src/test/resources/xml/bad.xml 
b/contrib/format-xml/src/test/resources/xml/bad.xml
new file mode 100644
index 0000000000..94625f362f
--- /dev/null
+++ b/contrib/format-xml/src/test/resources/xml/bad.xml
@@ -0,0 +1,29 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+-->
+
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!DOCTYPE foo [
+        <!ELEMENT foo ANY >
+        <!ENTITY xxe SYSTEM "/etc/passwd" >]
+        >
+<creds>
+    <user>&xxe;</user>
+    <pass>mypass</pass>
+</creds>
\ No newline at end of file

Reply via email to