Repository: hive
Updated Branches:
  refs/heads/master 212939b1f -> 4b22fcbda


HIVE-18789: Disallow embedded element in UDFXPathUtil (Daniel Dai, reviewed by 
Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4b22fcbd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4b22fcbd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4b22fcbd

Branch: refs/heads/master
Commit: 4b22fcbdaf31a2ff8c4bcf6d69ce3dcb8a363b82
Parents: 212939b
Author: Daniel Dai <da...@hortonworks.com>
Authored: Mon Feb 26 15:48:42 2018 -0800
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Mon Feb 26 15:48:42 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/udf/xml/UDFXPathUtil.java    | 31 +++++++++++++++++++-
 .../hive/ql/udf/xml/TestUDFXPathUtil.java       | 31 +++++++++++++++++++-
 2 files changed, 60 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4b22fcbd/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
index fbdd340..756a547 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
@@ -22,7 +22,11 @@ import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 
+import javax.xml.XMLConstants;
 import javax.xml.namespace.QName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
@@ -38,9 +42,13 @@ import org.xml.sax.InputSource;
  * of this class.
  */
 public class UDFXPathUtil {
+  static final boolean DISABLE_XINCLUDE = true;
+  private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+  private DocumentBuilder builder = null;
   private XPath xpath = XPathFactory.newInstance().newXPath();
   private ReusableStringReader reader = new ReusableStringReader();
   private InputSource inputSource = new InputSource(reader);
+
   private XPathExpression expression = null;
   private String oldPath = null;
 
@@ -66,12 +74,33 @@ public class UDFXPathUtil {
       return null;
     }
 
+    if (builder == null){
+      initializeDocumentBuilderFactory();
+      try {
+        builder = dbf.newDocumentBuilder();
+      } catch (ParserConfigurationException e) {
+        throw new RuntimeException("Error instantiating DocumentBuilder, 
cannot build xml parser", e);
+      }
+    }
+
     reader.set(xml);
 
     try {
-      return expression.evaluate(inputSource, qname);
+      return expression.evaluate(builder.parse(inputSource), qname);
     } catch (XPathExpressionException e) {
       throw new RuntimeException ("Invalid expression '" + oldPath + "'", e);
+    } catch (Exception e) {
+      throw new RuntimeException("Error loading expression '" + oldPath + "'", 
e);
+    }
+  }
+
+  private void initializeDocumentBuilderFactory() {
+
+    dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+    dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+
+    if (DISABLE_XINCLUDE){
+      dbf.setXIncludeAware(false);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/4b22fcbd/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
index 2edcb7d..b28ed63 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
@@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.udf.xml;
 
 import javax.xml.xpath.XPathConstants;
 
+import org.apache.commons.io.FileUtils;
 import org.junit.Test;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 import static org.junit.Assert.*;
 
+import java.io.File;
+
 public class TestUDFXPathUtil {
 
   @Test
@@ -78,5 +81,31 @@ public class TestUDFXPathUtil {
     assertTrue(result instanceof NodeList);
     assertEquals(5, ((NodeList)result).getLength());
   }
-  
+
+  @Test
+  public void testEmbedFailure() throws Exception {
+
+    String secretValue = String.valueOf(Math.random());
+    File tempFile = File.createTempFile("verifyembed", ".tmp");
+    tempFile.deleteOnExit();
+    String fname = tempFile.getAbsolutePath();
+
+    FileUtils.writeStringToFile(tempFile, secretValue);
+
+    String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
+        "<!DOCTYPE test [ \n" +
+        "    <!ENTITY embed SYSTEM \"" + fname + "\"> \n" +
+        "]>\n" +
+        "<foo>&embed;</foo>";
+
+    String evaled = null;
+    Exception caught = null;
+    try {
+      evaled = new UDFXPathUtil().evalString(xml, "/foo");
+    } catch (Exception e){
+      caught = e;
+    }
+    assertTrue(caught.getCause().getMessage()
+      .contains("\'file\' access is not allowed due to restriction set by the 
accessExternalDTD property"));
+  }
 }

Reply via email to