Repository: hive Updated Branches: refs/heads/master 212939b1f -> 4b22fcbda
HIVE-18789: Disallow embedded element in UDFXPathUtil (Daniel Dai, reviewed by Thejas Nair) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4b22fcbd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4b22fcbd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4b22fcbd Branch: refs/heads/master Commit: 4b22fcbdaf31a2ff8c4bcf6d69ce3dcb8a363b82 Parents: 212939b Author: Daniel Dai <da...@hortonworks.com> Authored: Mon Feb 26 15:48:42 2018 -0800 Committer: Daniel Dai <da...@hortonworks.com> Committed: Mon Feb 26 15:48:42 2018 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/udf/xml/UDFXPathUtil.java | 31 +++++++++++++++++++- .../hive/ql/udf/xml/TestUDFXPathUtil.java | 31 +++++++++++++++++++- 2 files changed, 60 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4b22fcbd/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java index fbdd340..756a547 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java @@ -22,7 +22,11 @@ import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import javax.xml.XMLConstants; import javax.xml.namespace.QName; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; @@ -38,9 +42,13 @@ import org.xml.sax.InputSource; * of this class. */ public class UDFXPathUtil { + static final boolean DISABLE_XINCLUDE = true; + private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + private DocumentBuilder builder = null; private XPath xpath = XPathFactory.newInstance().newXPath(); private ReusableStringReader reader = new ReusableStringReader(); private InputSource inputSource = new InputSource(reader); + private XPathExpression expression = null; private String oldPath = null; @@ -66,12 +74,33 @@ public class UDFXPathUtil { return null; } + if (builder == null){ + initializeDocumentBuilderFactory(); + try { + builder = dbf.newDocumentBuilder(); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Error instantiating DocumentBuilder, cannot build xml parser", e); + } + } + reader.set(xml); try { - return expression.evaluate(inputSource, qname); + return expression.evaluate(builder.parse(inputSource), qname); } catch (XPathExpressionException e) { throw new RuntimeException ("Invalid expression '" + oldPath + "'", e); + } catch (Exception e) { + throw new RuntimeException("Error loading expression '" + oldPath + "'", e); + } + } + + private void initializeDocumentBuilderFactory() { + + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + + if (DISABLE_XINCLUDE){ + dbf.setXIncludeAware(false); } } http://git-wip-us.apache.org/repos/asf/hive/blob/4b22fcbd/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java index 2edcb7d..b28ed63 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java @@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.udf.xml; import javax.xml.xpath.XPathConstants; +import org.apache.commons.io.FileUtils; import org.junit.Test; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import static org.junit.Assert.*; +import java.io.File; + public class TestUDFXPathUtil { @Test @@ -78,5 +81,31 @@ public class TestUDFXPathUtil { assertTrue(result instanceof NodeList); assertEquals(5, ((NodeList)result).getLength()); } - + + @Test + public void testEmbedFailure() throws Exception { + + String secretValue = String.valueOf(Math.random()); + File tempFile = File.createTempFile("verifyembed", ".tmp"); + tempFile.deleteOnExit(); + String fname = tempFile.getAbsolutePath(); + + FileUtils.writeStringToFile(tempFile, secretValue); + + String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + + "<!DOCTYPE test [ \n" + + " <!ENTITY embed SYSTEM \"" + fname + "\"> \n" + + "]>\n" + + "<foo>&embed;</foo>"; + + String evaled = null; + Exception caught = null; + try { + evaled = new UDFXPathUtil().evalString(xml, "/foo"); + } catch (Exception e){ + caught = e; + } + assertTrue(caught.getCause().getMessage() + .contains("\'file\' access is not allowed due to restriction set by the accessExternalDTD property")); + } }