Repository: hive Updated Branches: refs/heads/master b1fffd5a8 -> 1e97b1618
HIVE-11771: Parquet timestamp conversion errors (Jimmy, reviewed by Szehon) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1e97b161 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1e97b161 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1e97b161 Branch: refs/heads/master Commit: 1e97b16181941f8c21684f4b7a4958b890ef7738 Parents: b1fffd5 Author: Jimmy Xiang <jxi...@cloudera.com> Authored: Wed Sep 9 13:26:06 2015 -0700 Committer: Jimmy Xiang <jxi...@cloudera.com> Committed: Sat Sep 12 14:43:14 2015 -0700 ---------------------------------------------------------------------- .../ql/io/parquet/timestamp/NanoTimeUtils.java | 23 +++++++++--- .../serde/TestParquetTimestampUtils.java | 38 +++++++++++++++++++- 2 files changed, 56 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index 59c9b4a..aace48e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -15,6 +15,7 @@ package org.apache.hadoop.hive.ql.io.parquet.timestamp; import java.sql.Timestamp; import java.util.Calendar; +import java.util.GregorianCalendar; import java.util.TimeZone; import java.util.concurrent.TimeUnit; @@ -28,6 +29,7 @@ public class NanoTimeUtils { static final long NANOS_PER_HOUR = TimeUnit.HOURS.toNanos(1); static final long NANOS_PER_MINUTE = TimeUnit.MINUTES.toNanos(1); static final long NANOS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); + static final long NANOS_PER_DAY = TimeUnit.DAYS.toNanos(1); private static final ThreadLocal<Calendar> parquetGMTCalendar = new ThreadLocal<Calendar>(); private static final ThreadLocal<Calendar> parquetLocalCalendar = new ThreadLocal<Calendar>(); @@ -48,14 +50,20 @@ public class NanoTimeUtils { } private static Calendar getCalendar(boolean skipConversion) { - return skipConversion ? getLocalCalendar() : getGMTCalendar(); + Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar(); + calendar.clear(); // Reset all fields before reusing this instance + return calendar; } public static NanoTime getNanoTime(Timestamp ts, boolean skipConversion) { Calendar calendar = getCalendar(skipConversion); calendar.setTime(ts); - JDateTime jDateTime = new JDateTime(calendar.get(Calendar.YEAR), + int year = calendar.get(Calendar.YEAR); + if (calendar.get(Calendar.ERA) == GregorianCalendar.BC) { + year = 1 - year; + } + JDateTime jDateTime = new JDateTime(year, calendar.get(Calendar.MONTH) + 1, //java calendar index starting at 1. calendar.get(Calendar.DAY_OF_MONTH)); int days = jDateTime.getJulianDayNumber(); @@ -74,13 +82,20 @@ public class NanoTimeUtils { int julianDay = nt.getJulianDay(); long nanosOfDay = nt.getTimeOfDayNanos(); + long remainder = nanosOfDay; + julianDay += remainder / NANOS_PER_DAY; + remainder %= NANOS_PER_DAY; + if (remainder < 0) { + remainder += NANOS_PER_DAY; + julianDay--; + } + JDateTime jDateTime = new JDateTime((double) julianDay); Calendar calendar = getCalendar(skipConversion); calendar.set(Calendar.YEAR, jDateTime.getYear()); - calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calender index starting at 1. + calendar.set(Calendar.MONTH, jDateTime.getMonth() - 1); //java calendar index starting at 1. calendar.set(Calendar.DAY_OF_MONTH, jDateTime.getDay()); - long remainder = nanosOfDay; int hour = (int) (remainder / (NANOS_PER_HOUR)); remainder = remainder % (NANOS_PER_HOUR); int minutes = (int) (remainder / (NANOS_PER_MINUTE)); http://git-wip-us.apache.org/repos/asf/hive/blob/1e97b161/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java index 510ffd1..ec6def5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetTimestampUtils.java @@ -16,7 +16,9 @@ package org.apache.hadoop.hive.ql.io.parquet.serde; import java.sql.Timestamp; import java.util.Calendar; import java.util.Date; +import java.util.GregorianCalendar; import java.util.TimeZone; +import java.util.concurrent.TimeUnit; import junit.framework.Assert; import junit.framework.TestCase; @@ -74,7 +76,36 @@ public class TestParquetTimestampUtils extends TestCase { Timestamp ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); Assert.assertEquals(ts2Fetched, ts2); Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 30); - } + + //check if 1464305 Julian Days between Jan 1, 2005 BC and Jan 31, 2005. + cal1 = Calendar.getInstance(); + cal1.set(Calendar.ERA, GregorianCalendar.BC); + cal1.set(Calendar.YEAR, 2005); + cal1.set(Calendar.MONTH, Calendar.JANUARY); + cal1.set(Calendar.DAY_OF_MONTH, 1); + cal1.set(Calendar.HOUR_OF_DAY, 0); + cal1.setTimeZone(TimeZone.getTimeZone("GMT")); + + ts1 = new Timestamp(cal1.getTimeInMillis()); + nt1 = NanoTimeUtils.getNanoTime(ts1, false); + + ts1Fetched = NanoTimeUtils.getTimestamp(nt1, false); + Assert.assertEquals(ts1Fetched, ts1); + + cal2 = Calendar.getInstance(); + cal2.set(Calendar.YEAR, 2005); + cal2.set(Calendar.MONTH, Calendar.JANUARY); + cal2.set(Calendar.DAY_OF_MONTH, 31); + cal2.set(Calendar.HOUR_OF_DAY, 0); + cal2.setTimeZone(TimeZone.getTimeZone("UTC")); + + ts2 = new Timestamp(cal2.getTimeInMillis()); + nt2 = NanoTimeUtils.getNanoTime(ts2, false); + + ts2Fetched = NanoTimeUtils.getTimestamp(nt2, false); + Assert.assertEquals(ts2Fetched, ts2); + Assert.assertEquals(nt2.getJulianDay() - nt1.getJulianDay(), 1464305); +} public void testNanos() { //case 1: 01:01:01.0000000001 @@ -136,6 +167,11 @@ public class TestParquetTimestampUtils extends TestCase { NanoTime n1 = NanoTimeUtils.getNanoTime(ts1, false); Assert.assertEquals(n2.getTimeOfDayNanos() - n1.getTimeOfDayNanos(), 600000000009L); + + NanoTime n3 = new NanoTime(n1.getJulianDay() - 1, n1.getTimeOfDayNanos() + TimeUnit.DAYS.toNanos(1)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); + n3 = new NanoTime(n1.getJulianDay() + 3, n1.getTimeOfDayNanos() - TimeUnit.DAYS.toNanos(3)); + Assert.assertEquals(ts1, NanoTimeUtils.getTimestamp(n3, false)); } public void testTimezone() {