HIVE-10639 create SHA1 UDF (Alexander Pivovarov, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/84a2235c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/84a2235c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/84a2235c Branch: refs/heads/parquet Commit: 84a2235c5590944120bb9319565f5e113b6275a1 Parents: bf7810a Author: Alexander Pivovarov <apivova...@gmail.com> Authored: Wed May 6 20:13:22 2015 -0700 Committer: Alexander Pivovarov <apivova...@gmail.com> Committed: Fri May 15 10:23:27 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 3 + .../org/apache/hadoop/hive/ql/udf/UDFSha1.java | 88 ++++++++++++++++++++ .../apache/hadoop/hive/ql/udf/TestUDFSha1.java | 57 +++++++++++++ ql/src/test/queries/clientpositive/udf_sha1.q | 13 +++ .../results/clientpositive/show_functions.q.out | 2 + .../test/results/clientpositive/udf_sha1.q.out | 61 ++++++++++++++ 6 files changed, 224 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 02a604f..606185c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -86,6 +86,7 @@ import org.apache.hadoop.hive.ql.udf.UDFRegExpReplace; import org.apache.hadoop.hive.ql.udf.UDFRepeat; import org.apache.hadoop.hive.ql.udf.UDFReverse; import org.apache.hadoop.hive.ql.udf.UDFSecond; +import org.apache.hadoop.hive.ql.udf.UDFSha1; import org.apache.hadoop.hive.ql.udf.UDFSign; import org.apache.hadoop.hive.ql.udf.UDFSin; import org.apache.hadoop.hive.ql.udf.UDFSpace; @@ -226,6 +227,8 @@ public final class FunctionRegistry { system.registerUDF("base64", UDFBase64.class, false); system.registerUDF("unbase64", UDFUnbase64.class, false); system.registerUDF("md5", UDFMd5.class, false); + system.registerUDF("sha1", UDFSha1.class, false); + system.registerUDF("sha", UDFSha1.class, false); system.registerGenericUDF("encode", GenericUDFEncode.class); system.registerGenericUDF("decode", GenericUDFDecode.class); http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java new file mode 100644 index 0000000..04e6f81 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSha1.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.commons.codec.binary.Hex; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; + +/** + * UDFSha. + * + */ +@Description(name = "sha1,sha", + value = "_FUNC_(str or bin) - Calculates the SHA-1 digest for string or binary " + + "and returns the value as a hex string.", + extended = "Example:\n" + + " > SELECT _FUNC_('ABC');\n" + + " '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'\n" + + " > SELECT _FUNC_(binary('ABC'));\n" + + " '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8'") +public class UDFSha1 extends UDF { + + private final Text result = new Text(); + private final MessageDigest digest; + + public UDFSha1() { + try { + digest = MessageDigest.getInstance("SHA"); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } + } + + /** + * Convert String to SHA-1 + */ + public Text evaluate(Text n) { + if (n == null) { + return null; + } + + digest.reset(); + digest.update(n.getBytes(), 0, n.getLength()); + byte[] shaBytes = digest.digest(); + String shaHex = Hex.encodeHexString(shaBytes); + + result.set(shaHex); + return result; + } + + /** + * Convert bytes to SHA-1 + */ + public Text evaluate(BytesWritable b) { + if (b == null) { + return null; + } + + digest.reset(); + digest.update(b.getBytes(), 0, b.getLength()); + byte[] shaBytes = digest.digest(); + String shaHex = Hex.encodeHexString(shaBytes); + + result.set(shaHex); + return result; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java new file mode 100644 index 0000000..fefec92 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFSha1.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Text; + +public class TestUDFSha1 extends TestCase { + + public void testSha1Str() throws HiveException { + UDFSha1 udf = new UDFSha1(); + + runAndVerifyStr("ABC", "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8", udf); + runAndVerifyStr("", "da39a3ee5e6b4b0d3255bfef95601890afd80709", udf); + // null + runAndVerifyStr(null, null, udf); + } + + public void testSha1Bin() throws HiveException { + UDFSha1 udf = new UDFSha1(); + + runAndVerifyBin(new byte[] { 65, 66, 67 }, "3c01bdbb26f358bab27f267924aa2c9a03fcfdb8", udf); + runAndVerifyBin(new byte[0], "da39a3ee5e6b4b0d3255bfef95601890afd80709", udf); + // null + runAndVerifyBin(null, null, udf); + } + + private void runAndVerifyStr(String str, String expResult, UDFSha1 udf) throws HiveException { + Text t = str != null ? new Text(str) : null; + Text output = (Text) udf.evaluate(t); + assertEquals("sha1() test ", expResult, output != null ? output.toString() : null); + } + + private void runAndVerifyBin(byte[] binV, String expResult, UDFSha1 udf) throws HiveException { + BytesWritable binWr = binV != null ? new BytesWritable(binV) : null; + Text output = (Text) udf.evaluate(binWr); + assertEquals("sha1() test ", expResult, output != null ? output.toString() : null); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/queries/clientpositive/udf_sha1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udf_sha1.q b/ql/src/test/queries/clientpositive/udf_sha1.q new file mode 100644 index 0000000..0bfa7ec --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_sha1.q @@ -0,0 +1,13 @@ +DESCRIBE FUNCTION sha1; +DESC FUNCTION EXTENDED sha; + +explain select sha1('ABC'); + +select +sha1('ABC'), +sha(''), +sha(binary('ABC')), +sha1(binary('')), +sha1(cast(null as string)), +sha(cast(null as binary)), +sha1(null); http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/results/clientpositive/show_functions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index a422760..0948154 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -170,6 +170,8 @@ rpad rtrim second sentences +sha +sha1 shiftleft shiftright shiftrightunsigned http://git-wip-us.apache.org/repos/asf/hive/blob/84a2235c/ql/src/test/results/clientpositive/udf_sha1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udf_sha1.q.out b/ql/src/test/results/clientpositive/udf_sha1.q.out new file mode 100644 index 0000000..ea7c805 --- /dev/null +++ b/ql/src/test/results/clientpositive/udf_sha1.q.out @@ -0,0 +1,61 @@ +PREHOOK: query: DESCRIBE FUNCTION sha1 +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION sha1 +POSTHOOK: type: DESCFUNCTION +sha1(str or bin) - Calculates the SHA-1 digest for string or binary and returns the value as a hex string. +PREHOOK: query: DESC FUNCTION EXTENDED sha +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESC FUNCTION EXTENDED sha +POSTHOOK: type: DESCFUNCTION +sha(str or bin) - Calculates the SHA-1 digest for string or binary and returns the value as a hex string. +Synonyms: sha1 +Example: + > SELECT sha('ABC'); + '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8' + > SELECT sha(binary('ABC')); + '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8' +PREHOOK: query: explain select sha1('ABC') +PREHOOK: type: QUERY +POSTHOOK: query: explain select sha1('ABC') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: '3c01bdbb26f358bab27f267924aa2c9a03fcfdb8' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE + ListSink + +PREHOOK: query: select +sha1('ABC'), +sha(''), +sha(binary('ABC')), +sha1(binary('')), +sha1(cast(null as string)), +sha(cast(null as binary)), +sha1(null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select +sha1('ABC'), +sha(''), +sha(binary('ABC')), +sha1(binary('')), +sha1(cast(null as string)), +sha(cast(null as binary)), +sha1(null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +3c01bdbb26f358bab27f267924aa2c9a03fcfdb8 da39a3ee5e6b4b0d3255bfef95601890afd80709 3c01bdbb26f358bab27f267924aa2c9a03fcfdb8 da39a3ee5e6b4b0d3255bfef95601890afd80709 NULL NULL NULL