HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere)

Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/84a2235c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/84a2235c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/84a2235c

Branch: refs/heads/parquet
Commit: 84a2235c5590944120bb9319565f5e113b6275a1
Parents: bf7810a
Author: Alexander Pivovarov <apivova...@gmail.com>
Authored: Wed May 6 20:13:22 2015 -0700
Committer: Alexander Pivovarov <apivova...@gmail.com>
Committed: Fri May 15 10:23:27 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  3 +
 .../org/apache/hadoop/hive/ql/udf/UDFSha1.java  | 88 ++++++++++++++++++++
 .../apache/hadoop/hive/ql/udf/TestUDFSha1.java  | 57 +++++++++++++
 ql/src/test/queries/clientpositive/udf_sha1.q   | 13 +++
 .../results/clientpositive/show_functions.q.out |  2 +
 .../test/results/clientpositive/udf_sha1.q.out  | 61 ++++++++++++++
 6 files changed, 224 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 02a604f..606185c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -86,6 +86,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace;
 import org.apache.hadoop.hive.ql.udf.UDFRepeat;
 import org.apache.hadoop.hive.ql.udf.UDFReverse;
 import org.apache.hadoop.hive.ql.udf.UDFSecond;
+import org.apache.hadoop.hive.ql.udf.UDFSha1;
 import org.apache.hadoop.hive.ql.udf.UDFSign;
 import org.apache.hadoop.hive.ql.udf.UDFSin;
 import org.apache.hadoop.hive.ql.udf.UDFSpace;
@@ -226,6 +227,8 @@ public final class FunctionRegistry {
     system.registerUDF("base64", UDFBase64.class, false);
     system.registerUDF("unbase64", UDFUnbase64.class, false);
     system.registerUDF("md5", UDFMd5.class, false);
+    system.registerUDF("sha1", UDFSha1.class, false);
+    system.registerUDF("sha", UDFSha1.class, false);
 
     system.registerGenericUDF("encode", GenericUDFEncode.class);
     system.registerGenericUDF("decode", GenericUDFDecode.class);

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
new file mode 100644
index 0000000..04e6f81
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFSha.
+ *
+ */
+@Description(name = "sha1,sha",
+    value = "_FUNC_(str or bin) - Calculates the SHA-1 digest for string or 
binary "
+    + "and returns the value as a hex string.",
+    extended = "Example:\n"
+    + "  > SELECT _FUNC_('ABC');\n"
+    + "  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'\n"
+    + "  > SELECT _FUNC_(binary('ABC'));\n"
+    + "  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'")
+public class UDFSha1 extends UDF {
+
+  private final Text result = new Text();
+  private final MessageDigest digest;
+
+  public UDFSha1() {
+    try {
+      digest = MessageDigest.getInstance("SHA");
+    } catch (NoSuchAlgorithmException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Convert String to SHA-1
+   */
+  public Text evaluate(Text n) {
+    if (n == null) {
+      return null;
+    }
+
+    digest.reset();
+    digest.update(n.getBytes(), 0, n.getLength());
+    byte[] shaBytes = digest.digest();
+    String shaHex = Hex.encodeHexString(shaBytes);
+
+    result.set(shaHex);
+    return result;
+  }
+
+  /**
+   * Convert bytes to SHA-1
+   */
+  public Text evaluate(BytesWritable b) {
+    if (b == null) {
+      return null;
+    }
+
+    digest.reset();
+    digest.update(b.getBytes(), 0, b.getLength());
+    byte[] shaBytes = digest.digest();
+    String shaHex = Hex.encodeHexString(shaBytes);
+
+    result.set(shaHex);
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java
new file mode 100644
index 0000000..fefec92
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+public class TestUDFSha1 extends TestCase {
+
+  public void testSha1Str() throws HiveException {
+    UDFSha1 udf = new UDFSha1();
+
+    runAndVerifyStr("ABC", "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8", udf);
+    runAndVerifyStr("", "da39a3ee5e6b4b0d3255bfef95601890afd80709", udf);
+    // null
+    runAndVerifyStr(null, null, udf);
+  }
+
+  public void testSha1Bin() throws HiveException {
+    UDFSha1 udf = new UDFSha1();
+
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, 
"3c01bdbb26f358bab27f267924aa2c9a03fcfdb8", udf);
+    runAndVerifyBin(new byte[0], "da39a3ee5e6b4b0d3255bfef95601890afd80709", 
udf);
+    // null
+    runAndVerifyBin(null, null, udf);
+  }
+
+  private void runAndVerifyStr(String str, String expResult, UDFSha1 udf) 
throws HiveException {
+    Text t = str != null ? new Text(str) : null;
+    Text output = (Text) udf.evaluate(t);
+    assertEquals("sha1() test ", expResult, output != null ? output.toString() 
: null);
+  }
+
+  private void runAndVerifyBin(byte[] binV, String expResult, UDFSha1 udf) 
throws HiveException {
+    BytesWritable binWr = binV != null ? new BytesWritable(binV) : null;
+    Text output = (Text) udf.evaluate(binWr);
+    assertEquals("sha1() test ", expResult, output != null ? output.toString() 
: null);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/queries/clientpositive/udf_sha1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_sha1.q 
b/ql/src/test/queries/clientpositive/udf_sha1.q
new file mode 100644
index 0000000..0bfa7ec
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_sha1.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION sha1;
+DESC FUNCTION EXTENDED sha;
+
+explain select sha1('ABC');
+
+select
+sha1('ABC'),
+sha(''),
+sha(binary('ABC')),
+sha1(binary('')),
+sha1(cast(null as string)),
+sha(cast(null as binary)),
+sha1(null);

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out 
b/ql/src/test/results/clientpositive/show_functions.q.out
index a422760..0948154 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -170,6 +170,8 @@ rpad
 rtrim
 second
 sentences
+sha
+sha1
 shiftleft
 shiftright
 shiftrightunsigned

http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/results/clientpositive/udf_sha1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_sha1.q.out 
b/ql/src/test/results/clientpositive/udf_sha1.q.out
new file mode 100644
index 0000000..ea7c805
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_sha1.q.out
@@ -0,0 +1,61 @@
+PREHOOK: query: DESCRIBE FUNCTION sha1
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION sha1
+POSTHOOK: type: DESCFUNCTION
+sha1(str or bin) - Calculates the SHA-1 digest for string or binary and 
returns the value as a hex string.
+PREHOOK: query: DESC FUNCTION EXTENDED sha
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED sha
+POSTHOOK: type: DESCFUNCTION
+sha(str or bin) - Calculates the SHA-1 digest for string or binary and returns 
the value as a hex string.
+Synonyms: sha1
+Example:
+  > SELECT sha('ABC');
+  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'
+  > SELECT sha(binary('ABC'));
+  '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'
+PREHOOK: query: explain select sha1('ABC')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select sha1('ABC')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column 
stats: COMPLETE
+          Select Operator
+            expressions: '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8' (type: 
string)
+            outputColumnNames: _col0
+            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column 
stats: COMPLETE
+            ListSink
+
+PREHOOK: query: select
+sha1('ABC'),
+sha(''),
+sha(binary('ABC')),
+sha1(binary('')),
+sha1(cast(null as string)),
+sha(cast(null as binary)),
+sha1(null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+sha1('ABC'),
+sha(''),
+sha(binary('ABC')),
+sha1(binary('')),
+sha1(cast(null as string)),
+sha(cast(null as binary)),
+sha1(null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+3c01bdbb26f358bab27f267924aa2c9a03fcfdb8       
da39a3ee5e6b4b0d3255bfef95601890afd80709        
3c01bdbb26f358bab27f267924aa2c9a03fcfdb8        
da39a3ee5e6b4b0d3255bfef95601890afd80709        NULL    NULL    NULL

Reply via email to