This is an automated email from the ASF dual-hosted git repository.
samarth pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 82e5b05 Number based columns representing time in custom format
cannot be used as timestamp column in Druid. (#9877)
82e5b05 is described below
commit 82e5b0573efffd6f87ce4aead6a05b0c1d7381c7
Author: Samarth Jain <[email protected]>
AuthorDate: Mon May 18 11:17:28 2020 -0700
Number based columns representing time in custom format cannot be used as
timestamp column in Druid. (#9877)
* Number based columns representing time in custom format cannot be used as
timestamp column in Druid.
Prior to this fix, if an integer column in parquet is storing dateint in
format yyyyMMdd, it cannot be used as timestamp column in Druid as the
timestamp parser interprets it as a number storing UTC time instead of treating
it as a number representing time in yyyyMMdd format. Data formats like TSV or
CSV don't suffer from this problem as the timestamp is passed in an as string
which the timestamp parser is able to parse correctly.
---
.../java/util/common/parsers/TimestampParser.java | 13 ++++++-
.../util/common/parsers/TimestampParserTest.java | 44 ++++++++++++++++++++++
2 files changed, 56 insertions(+), 1 deletion(-)
diff --git
a/core/src/main/java/org/apache/druid/java/util/common/parsers/TimestampParser.java
b/core/src/main/java/org/apache/druid/java/util/common/parsers/TimestampParser.java
index ecd06f8..e5a9520 100644
---
a/core/src/main/java/org/apache/druid/java/util/common/parsers/TimestampParser.java
+++
b/core/src/main/java/org/apache/druid/java/util/common/parsers/TimestampParser.java
@@ -122,11 +122,12 @@ public class TimestampParser
{
final Function<String, DateTime> stringFun = createTimestampParser(format);
final Function<Number, DateTime> numericFun =
createNumericTimestampParser(format);
+ final boolean isNumericFormat = isNumericFormat(format);
return o -> {
Preconditions.checkNotNull(o, "null timestamp");
- if (o instanceof Number) {
+ if (o instanceof Number && isNumericFormat) {
return numericFun.apply((Number) o);
} else {
return stringFun.apply(o.toString());
@@ -134,6 +135,16 @@ public class TimestampParser
};
}
+ private static boolean isNumericFormat(String format)
+ {
+ return "auto".equalsIgnoreCase(format)
+ || "millis".equalsIgnoreCase(format)
+ || "posix".equalsIgnoreCase(format)
+ || "micro".equalsIgnoreCase(format)
+ || "nano".equalsIgnoreCase(format)
+ || "ruby".equalsIgnoreCase(format);
+ }
+
private static DateTimeFormatter createAutoParser()
{
final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder()
diff --git
a/core/src/test/java/org/apache/druid/java/util/common/parsers/TimestampParserTest.java
b/core/src/test/java/org/apache/druid/java/util/common/parsers/TimestampParserTest.java
index 9c0e66e..a5622d5 100644
---
a/core/src/test/java/org/apache/druid/java/util/common/parsers/TimestampParserTest.java
+++
b/core/src/test/java/org/apache/druid/java/util/common/parsers/TimestampParserTest.java
@@ -181,4 +181,48 @@ public class TimestampParserTest
.getMillis()
);
}
+
+ @Test
+ public void testFormatsForNumberBasedTimestamp()
+ {
+ int yearMonthDate = 20200514;
+ DateTime expectedDt = DateTimes.of("2020-05-14T00:00:00.000Z");
+ Function<Object, DateTime> parser =
TimestampParser.createObjectTimestampParser("yyyyMMdd");
+ Assert.assertEquals("Timestamp of format yyyyMMdd not parsed correctly",
+ expectedDt, parser.apply(yearMonthDate));
+
+ int year = 2020;
+ expectedDt = DateTimes.of("2020-01-01T00:00:00.000Z");
+ parser = TimestampParser.createObjectTimestampParser("yyyy");
+ Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
+ expectedDt, parser.apply(year));
+
+ int yearMonth = 202010;
+ expectedDt = DateTimes.of("2020-10-01T00:00:00.000Z");
+ parser = TimestampParser.createObjectTimestampParser("yyyyMM");
+ Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
+ expectedDt, parser.apply(yearMonth));
+
+ // Friday, May 15, 2020 8:20:40 PM GMT
+ long millis = 1589574040000l;
+ expectedDt = DateTimes.of("2020-05-15T20:20:40.000Z");
+
+ parser = TimestampParser.createObjectTimestampParser("millis");
+ Assert.assertEquals("Timestamp of format millis not parsed correctly",
+ expectedDt, parser.apply(millis));
+ parser = TimestampParser.createObjectTimestampParser("auto");
+ Assert.assertEquals("Timestamp of format auto not parsed correctly",
+ expectedDt, parser.apply(millis));
+
+ int posix = 1589574040;
+ parser = TimestampParser.createObjectTimestampParser("posix");
+ Assert.assertEquals("Timestamp of format posix not parsed correctly",
+ expectedDt, parser.apply(posix));
+
+ long micro = 1589574040000000l;
+ parser = TimestampParser.createObjectTimestampParser("micro");
+ Assert.assertEquals("Timestamp of format micro not parsed correctly",
+ expectedDt, parser.apply(micro));
+
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]