HIVE-12531 : Implement fast-path for Year/Month UDFs for dates between 1999 and 2038 (Jason Dere via Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e384b2b6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e384b2b6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e384b2b6 Branch: refs/heads/llap Commit: e384b2b657c819d5963b8f76222f78bb479a29a2 Parents: b75d9ea Author: Jason Dere <jd...@hortonworks.com> Authored: Wed Dec 9 11:48:00 2015 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Fri Mar 25 07:21:55 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/ql/udf/UDFMonth.java | 16 ++++++++-------- .../java/org/apache/hadoop/hive/ql/udf/UDFYear.java | 16 ++++++++-------- .../expressions/TestVectorDateExpressions.java | 13 ++++++++++--- 3 files changed, 26 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java index 8c2b0e4..05afb8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMonth.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; import java.util.Date; +import org.joda.time.MutableDateTime; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; @@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text; @NDV(maxNdv = 31) public class UDFMonth extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + private transient final MutableDateTime mdt = new MutableDateTime(); private final IntWritable result = new IntWritable(); @@ -75,8 +75,8 @@ public class UDFMonth extends UDF { } try { Date date = formatter.parse(dateString.toString()); - calendar.setTime(date); - result.set(1 + calendar.get(Calendar.MONTH)); + mdt.setMillis(date.getTime()); + result.set(mdt.getMonthOfYear()); return result; } catch (ParseException e) { return null; @@ -88,8 +88,8 @@ public class UDFMonth extends UDF { return null; } - calendar.setTime(d.get()); - result.set(1 + calendar.get(Calendar.MONTH)); + mdt.setMillis(d.get().getTime()); + result.set(mdt.getMonthOfYear()); return result; } @@ -98,8 +98,8 @@ public class UDFMonth extends UDF { return null; } - calendar.setTime(t.getTimestamp()); - result.set(1 + calendar.get(Calendar.MONTH)); + mdt.setMillis(t.getTimestamp().getTime()); + result.set(mdt.getMonthOfYear()); return result; } http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java index d7ecd8c..fb3a655 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFYear.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hive.ql.udf; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.Calendar; import java.util.Date; +import org.joda.time.MutableDateTime; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; @@ -53,7 +53,7 @@ import org.apache.hadoop.io.Text; @NDV(maxNdv = 20) // although technically its unbounded, its unlikely we will ever see ndv > 20 public class UDFYear extends UDF { private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private final Calendar calendar = Calendar.getInstance(); + private transient final MutableDateTime mdt = new MutableDateTime(); private final IntWritable result = new IntWritable(); @@ -77,8 +77,8 @@ public class UDFYear extends UDF { try { Date date = formatter.parse(dateString.toString()); - calendar.setTime(date); - result.set(calendar.get(Calendar.YEAR)); + mdt.setMillis(date.getTime()); + result.set(mdt.getYear()); return result; } catch (ParseException e) { return null; @@ -90,8 +90,8 @@ public class UDFYear extends UDF { return null; } - calendar.setTime(d.get()); - result.set(calendar.get(Calendar.YEAR)); + mdt.setMillis(d.get().getTime()); + result.set(mdt.getYear()); return result; } @@ -100,8 +100,8 @@ public class UDFYear extends UDF { return null; } - calendar.setTime(t.getTimestamp()); - result.set(calendar.get(Calendar.YEAR)); + mdt.setMillis(t.getTimestamp().getTime()); + result.set(mdt.getYear()); return result; } http://git-wip-us.apache.org/repos/asf/hive/blob/e384b2b6/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java index 58cecc1..61c96e9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorDateExpressions.java @@ -38,7 +38,9 @@ import org.junit.Test; import org.junit.internal.runners.statements.Fail; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import com.sun.tools.javac.resources.javac; +import java.sql.Date; import java.sql.Timestamp; import java.util.ArrayList; import java.util.Calendar; @@ -53,6 +55,7 @@ import java.util.concurrent.ThreadFactory; public class TestVectorDateExpressions { private ExecutorService runner; + private static final int MAX_SANE_DATE_VALUE = new DateWritable(Date.valueOf("3000-01-01")).getDays(); /* copied over from VectorUDFTimestampFieldLong */ private TimestampWritable toTimestampWritable(long daysSinceEpoch) { @@ -78,11 +81,15 @@ public class TestVectorDateExpressions { } private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size) { + return getVectorizedRandomRowBatch(seed, size, Integer.MAX_VALUE); + } + + private VectorizedRowBatch getVectorizedRandomRowBatch(int seed, int size, int maxValue) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); LongColumnVector lcv = new LongColumnVector(size); Random rand = new Random(seed); for (int i = 0; i < size; i++) { - lcv.vector[i] = (rand.nextInt()); + lcv.vector[i] = (rand.nextInt(maxValue)); } batch.cols[0] = lcv; batch.cols[1] = new LongColumnVector(size); @@ -159,7 +166,7 @@ public class TestVectorDateExpressions { batch.cols[0].isNull[0] = true; verifyUDFYear(batch); - batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE); verifyUDFYear(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFYear(batch); @@ -283,7 +290,7 @@ public class TestVectorDateExpressions { batch.cols[0].isNull[0] = true; verifyUDFMonth(batch); - batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE); + batch = getVectorizedRandomRowBatch(200, VectorizedRowBatch.DEFAULT_SIZE, MAX_SANE_DATE_VALUE); verifyUDFMonth(batch); TestVectorizedRowBatch.addRandomNulls(batch.cols[0]); verifyUDFMonth(batch);