This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit ed51dfdbdfc109db53b3c631e9f1631e9bb65c34 Author: Stamatis Zampetakis <zabe...@gmail.com> AuthorDate: Thu Aug 17 22:02:59 2023 +0300 HIVE-25576: Configurable datetime formatter for unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai Hemanth Gantasala) The two Java formatters present diffences in their behavior leading to different query results. The supported patterns, between the two formatters, are also different something that makes existing queries crash at runtime (after upgrade). Adapting to the new behavior of DateTimeFormatter is a challenging and time-consuming task for end users especially due to the widespread use of the afforementioned unixtime functions. Although DateTimeFormatter is a clear improvement over SimpleDateFormat some users still want to retain the old behavior for compatibility reasons thus introducing a property is necessary for facilitating migration. Overview of the change: 1. Add hive.datetime.formatter property to control formatter in unix_timestamp and from_unixtime functions. 2. Add UnixTimeFormatter class hierarchy for encapsulating formatting and parsing of unixtime based on the configuration. 3. Refactor unix_timestamp (+vectorized) and from_unixtime implementations to use the new formatter classes. 4. Add parameterized unit tests for the affected UDF implementations. The test cases are chosen in way to highlight similarities and differences between the two available formatters and document the current behavior. Few interesting test cases are discussed in more detail below but not all of them. * Dates before 1800 in different timezones 1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;-5364683608 1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;-5364682200 The DATETIME and SIMPLE formatter use slightly different zone conversion rules so mapping 1800-01-01 00:00:00 Asia/Kolkata to seconds since epoch presents differences. * Invalid pattern and AM/PM timestamps 2023-07-21 09:13PM;yyyy-MM-dd HH:mma;Etc/GMT;SIMPLE;1689930780 The SIMPLE formatter returns a wrong result when an invalid pattern is used. The value 1689930780 actually corrsponds to 2023-07-21 09:13AM (not PM as it was supposed to ); it seems that 'HH' takes precedence over 'a'. The combined use of 'H' and 'a' is problematic. When using AM and PM the 'h' letter is the correct pattern letter. * Number of pattern letters Jul 9 2023;MMM dd yyyy;Etc/GMT;DATETIME;null Jul 9 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1688860800 The SIMPLE formatter does not care how many times a pattern letter is used when parsing. For this reason although the day appears as a single digit for the SIMPLE formatter that is completely fine. The same does not hold for the DATETIME formatter. Closes #4615 --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 13 ++- .../hadoop/hive/conf/TestHiveConfVarsValidate.java | 6 ++ .../expressions/VectorUDFUnixTimeStampString.java | 27 ++--- .../ql/udf/generic/GenericUDFFromUnixTime.java | 35 ++---- .../ql/udf/generic/GenericUDFToUnixTimeStamp.java | 76 ++++--------- .../ql/udf/generic/UnixTimeDateTimeFormatter.java | 76 +++++++++++++ .../hive/ql/udf/generic/UnixTimeFormatter.java | 119 +++++++++++++++++++++ .../ql/udf/generic/UnixTimeFormatterCache.java | 50 +++++++++ .../udf/generic/UnixTimeSimpleDateFormatter.java | 69 ++++++++++++ .../TestVectorUDFUnixTimeStampString.java | 110 +++++++++++++++++++ .../TestGenericUDFFromUnixTimeEvaluate.java | 105 ++++++++++++++++++ ...ericUDFToUnixTimestampEvaluateStringString.java | 97 +++++++++++++++++ .../expressions/TestVectorUnixTimeStampString.csv | 36 +++++++ .../generic/TestGenericUDFFromUnixTimeEvaluate.csv | 48 +++++++++ ...nericUDFToUnixTimestampEvaluateStringString.csv | 54 ++++++++++ 15 files changed, 816 insertions(+), 105 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 108c545a384..14190915020 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3830,7 +3830,18 @@ public class HiveConf extends Configuration { HIVE_PRIVILEGE_SYNCHRONIZER_INTERVAL("hive.privilege.synchronizer.interval", "1800s", new TimeValidator(TimeUnit.SECONDS), "Interval to synchronize privileges from external authorizer periodically in HS2"), - + HIVE_DATETIME_FORMATTER("hive.datetime.formatter", "DATETIME", + new StringSet("DATETIME", "SIMPLE"), + "The formatter to use for handling datetime values. The possible values are:\n" + + " * DATETIME: For using java.time.format.DateTimeFormatter\n" + + " * SIMPLE: For using java.text.SimpleDateFormat (known bugs: HIVE-25458, HIVE-25403)\n" + + "Currently the configuration only affects the behavior of the following SQL functions:\n" + + " * unix_timestamp(string,[string])" + + " * from_unixtime\n\n" + + "The SIMPLE formatter exists purely for compatibility purposes with previous versions of Hive thus its use " + + "is discouraged. It suffers from known bugs that are unlikely to be fixed in subsequent versions of the product." + + "Furthermore, using SIMPLE formatter may lead to strange behavior, and unexpected results when combined " + + "with SQL functions/operators that are using the new DATETIME formatter."), // HiveServer2 specific configs HIVE_SERVER2_CLEAR_DANGLING_SCRATCH_DIR("hive.server2.clear.dangling.scratchdir", false, "Clear dangling scratch dir periodically in HS2"), diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java index 4e9eb3245d8..7ac44588c08 100644 --- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java +++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConfVarsValidate.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.List; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_DATETIME_FORMATTER; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_EXPLAIN_NODE_VISIT_LIMIT; import static org.junit.Assert.assertEquals; @@ -56,6 +57,11 @@ public class TestHiveConfVarsValidate { list.add(new Object[] { HIVE_EXPLAIN_NODE_VISIT_LIMIT, "1", null }); list.add(new Object[] { HIVE_EXPLAIN_NODE_VISIT_LIMIT, "14", null }); list.add(new Object[] { HIVE_EXPLAIN_NODE_VISIT_LIMIT, String.valueOf(Integer.MAX_VALUE), null }); + list.add(new Object[] { HIVE_DATETIME_FORMATTER, "DATETIME", null }); + list.add(new Object[] { HIVE_DATETIME_FORMATTER, "SIMPLE", null }); + list.add(new Object[] { HIVE_DATETIME_FORMATTER, "simple", null }); + list.add(new Object[] { HIVE_DATETIME_FORMATTER, "dateTime", null }); + list.add(new Object[] { HIVE_DATETIME_FORMATTER, "OTHER", "Invalid value.. expects one of [datetime, simple]" }); return list; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java index a2a3dc8ce84..278c392aaa1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java @@ -18,22 +18,13 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import java.time.ZoneId; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.Timestamp; -import org.apache.hadoop.hive.common.type.TimestampTZ; -import org.apache.hadoop.hive.common.type.TimestampTZUtil; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.UnixTimeFormatter; import org.apache.hadoop.io.Text; import java.nio.charset.CharacterCodingException; import java.text.ParseException; -import java.text.SimpleDateFormat; -import java.time.format.DateTimeFormatterBuilder; -import java.util.Date; -import java.util.Locale; -import java.util.TimeZone; /** * Return Unix Timestamp. @@ -43,7 +34,7 @@ public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldS private static final long serialVersionUID = 1L; - private transient ZoneId timeZone; + private transient UnixTimeFormatter formatter; public VectorUDFUnixTimeStampString(int colNum, int outputColumnNum) { super(colNum, outputColumnNum, -1, -1); @@ -56,22 +47,16 @@ public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldS @Override public void transientInit(Configuration conf) throws HiveException { super.transientInit(conf); - if (timeZone == null) { - String timeZoneStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE); - timeZone = TimestampTZUtil.parseTimeZone(timeZoneStr); + if (formatter == null) { + formatter = UnixTimeFormatter.ofConfiguration(conf); } } @Override protected long getField(byte[] bytes, int start, int length) throws ParseException { - try { - Timestamp timestamp = Timestamp.valueOf(Text.decode(bytes, start, length)); - TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp,timeZone); - return timestampTZ.getEpochSecond(); - } catch (CharacterCodingException e) { - throw new ParseException(e.getMessage(), 0); - } catch (IllegalArgumentException e){ + return formatter.parse(Text.decode(bytes, start, length)); + } catch (CharacterCodingException | RuntimeException e) { throw new ParseException(e.getMessage(), 0); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java index 21081cf7c11..99ca59749c6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java @@ -18,12 +18,6 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.time.Instant; -import java.time.ZoneId; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; - -import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.MapredContext; @@ -52,10 +46,8 @@ public class GenericUDFFromUnixTime extends GenericUDF { private transient IntObjectInspector inputIntOI; private transient LongObjectInspector inputLongOI; - private transient ZoneId timeZone; private transient final Text result = new Text(); - private transient String lastFormat ="uuuu-MM-dd HH:mm:ss"; - private transient DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern(lastFormat); + private transient UnixTimeFormatter formatter; private transient Converter[] converters = new Converter[2]; private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[2]; @@ -84,20 +76,16 @@ public class GenericUDFFromUnixTime extends GenericUDF { checkArgGroups(arguments, 1, inputTypes, STRING_GROUP); obtainStringConverter(arguments, 1, inputTypes, converters); } - - if (timeZone == null) { - timeZone = SessionState.get() == null ? new HiveConf().getLocalTimeZone() : SessionState.get().getConf() - .getLocalTimeZone(); + if (formatter == null) { + formatter = UnixTimeFormatter.ofConfiguration(SessionState.get() == null ? new HiveConf() : SessionState.getSessionConf()); } - return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } @Override public void configure(MapredContext context) { if (context != null) { - String timeZoneStr = HiveConf.getVar(context.getJobConf(), HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE); - timeZone = TimestampTZUtil.parseTimeZone(timeZoneStr); + formatter = UnixTimeFormatter.ofConfiguration(context.getJobConf()); } } @@ -107,21 +95,16 @@ public class GenericUDFFromUnixTime extends GenericUDF { return null; } - if(arguments.length == 2) { + long unixTime = (inputIntOI != null) ? inputIntOI.get(arguments[0].get()) : inputLongOI.get(arguments[0].get()); + if (arguments.length == 2) { String format = getStringValue(arguments, 1, converters); if (format == null) { return null; } - if (!format.equals(lastFormat)) { - FORMATTER = DateTimeFormatter.ofPattern(format); - lastFormat = format; - } + result.set(formatter.format(unixTime, format)); + } else { + result.set(formatter.format(unixTime)); } - - long unixTime = (inputIntOI != null) ? inputIntOI.get(arguments[0].get()) : inputLongOI.get(arguments[0].get()); - Instant instant = Instant.ofEpochSecond(unixTime); - ZonedDateTime zonedDT = ZonedDateTime.ofInstant(instant, timeZone); - result.set(zonedDT.format(FORMATTER)); return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 5075ee1525e..788a83a88b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -18,15 +18,8 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.time.DateTimeException; -import java.time.LocalDate; import java.time.ZoneId; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeFormatterBuilder; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.hive.common.type.Timestamp; import org.apache.hadoop.hive.common.type.TimestampTZ; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; @@ -67,12 +60,9 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient TimestampLocalTZObjectInspector inputTimestampLocalTzOI; private transient Converter inputTextConverter; private transient Converter patternConverter; + private transient UnixTimeFormatter formatter; private transient ZoneId timeZone; - private transient String lasPattern = "uuuu-MM-dd HH:mm:ss"; - private transient DateTimeFormatter formatter; - - @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { initializeInput(arguments); @@ -91,6 +81,7 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { } } + HiveConf conf = SessionState.get() == null ? new HiveConf() : SessionState.getSessionConf(); PrimitiveObjectInspector arg1OI = (PrimitiveObjectInspector) arguments[0]; switch (arg1OI.getPrimitiveCategory()) { case CHAR: @@ -108,8 +99,10 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { patternConverter = ObjectInspectorConverters.getConverter(arg2OI, PrimitiveObjectInspectorFactory.javaStringObjectInspector); } + if (formatter == null) { + formatter = UnixTimeFormatter.ofConfiguration(conf); + } break; - case DATE: inputDateOI = (DateObjectInspector) arguments[0]; break; @@ -124,15 +117,15 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { + " takes only string/date/timestamp/timestampwltz types. Got Type:" + arg1OI .getPrimitiveCategory().name()); } - - timeZone = SessionState.get() == null ? new HiveConf().getLocalTimeZone() : SessionState.get().getConf() - .getLocalTimeZone(); - formatter = getFormatter(lasPattern); + if (timeZone == null) { + timeZone = conf.getLocalTimeZone(); + } } @Override public void configure(MapredContext context) { if (context != null) { + formatter = UnixTimeFormatter.ofConfiguration(context.getJobConf()); String timeZoneStr = HiveConf.getVar(context.getJobConf(), HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE); timeZone = TimestampTZUtil.parseTimeZone(timeZoneStr); } @@ -149,48 +142,23 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { if (arguments[0].get() == null) { return null; } - if (inputTextConverter != null) { - Timestamp timestamp; String textVal = (String) inputTextConverter.convert(arguments[0].get()); if (textVal == null) { return null; } - - if (patternConverter != null) { - if (arguments[1].get() == null) { - return null; - } - String patternVal = (String) patternConverter.convert(arguments[1].get()); - if (patternVal == null) { - return null; - } - if (!patternVal.equals(lasPattern)) { - formatter = getFormatter(patternVal); - lasPattern = patternVal; - } - - try { - ZonedDateTime zonedDateTime = ZonedDateTime.parse(textVal, formatter.withZone(timeZone)).withZoneSameInstant(timeZone); - timestamp = new Timestamp(zonedDateTime.toLocalDateTime()); - } catch (DateTimeException e1) { - try { - LocalDate localDate = LocalDate.parse(textVal, formatter); - timestamp = new Timestamp(localDate.atStartOfDay()); - } catch (DateTimeException e3) { - return null; - } - } - } else { - try { - timestamp = Timestamp.valueOf(textVal); - } catch (IllegalArgumentException e) { - return null; + try { + final long epochSeconds; + if (patternConverter == null) { + epochSeconds = formatter.parse(textVal); + } else { + epochSeconds = formatter.parse(textVal, (String) patternConverter.convert(arguments[1].get())); } + retValue.set(epochSeconds); + return retValue; + } catch (RuntimeException e) { + return null; } - - TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp, timeZone); - retValue.set(timestampTZ.getEpochSecond()); } else if (inputDateOI != null) { TimestampTZ timestampTZ = TimestampTZUtil.convert( inputDateOI.getPrimitiveJavaObject(arguments[0].get()), timeZone); @@ -213,10 +181,4 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { return getStandardDisplayString(getName(),children); } - public DateTimeFormatter getFormatter(String pattern){ - return new DateTimeFormatterBuilder() - .parseCaseInsensitive() - .appendPattern(pattern) - .toFormatter(); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java new file mode 100644 index 00000000000..b45a595e2cc --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZ; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; + +import java.time.DateTimeException; +import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; +import java.util.Objects; + +final class UnixTimeDateTimeFormatter extends UnixTimeFormatterCache<DateTimeFormatter> { + + UnixTimeDateTimeFormatter(final ZoneId zoneId) { + super(zoneId, + s -> new DateTimeFormatterBuilder().parseCaseInsensitive().appendPattern(s).toFormatter().withZone(zoneId)); + } + + @Override + public long parse(String text) throws RuntimeException { + Objects.requireNonNull(text); + Timestamp timestamp = Timestamp.valueOf(text); + TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp, zoneId); + return timestampTZ.getEpochSecond(); + } + + @Override + public long parse(String text, String pattern) { + Objects.requireNonNull(text); + Objects.requireNonNull(pattern); + Timestamp timestamp; + DateTimeFormatter formatter = getFormatter(pattern); + try { + ZonedDateTime zonedDateTime = ZonedDateTime.parse(text, formatter).withZoneSameInstant(zoneId); + timestamp = new Timestamp(zonedDateTime.toLocalDateTime()); + } catch (DateTimeException e1) { + LocalDate localDate = LocalDate.parse(text, formatter); + timestamp = new Timestamp(localDate.atStartOfDay()); + } + TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp, zoneId); + return timestampTZ.getEpochSecond(); + } + + @Override + public String format(final long epochSeconds) { + return format(epochSeconds, "uuuu-MM-dd HH:mm:ss"); + } + + @Override + public String format(final long epochSeconds, final String pattern) { + DateTimeFormatter formatter = getFormatter(pattern); + Instant instant = Instant.ofEpochSecond(epochSeconds); + ZonedDateTime zonedDT = ZonedDateTime.ofInstant(instant, zoneId); + return zonedDT.format(formatter); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java new file mode 100644 index 00000000000..de4599006e2 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; +import org.apache.hadoop.hive.conf.HiveConf; + +import java.time.ZoneId; + +/** + * Formatter for parsing and printing unixtime objects (long numbers representing seconds since epoch). + * <p> + * This interface provides the main entry point for print and parsing and provides factories for the + * available implementations of {@code UnixTimeFormatter}. + * </p> + * <p> + * The patterns that are supported and their behavior depend on the underlying implementation of the interface. + * </p> + * <p> + * Implementations of the interface are not meant to be thread safe. + * </p> + */ +public interface UnixTimeFormatter { + + /** + * Types for the built-in formatter implementations. + */ + enum Type { + /** + * A formatter that supports the same patterns with {@link java.text.SimpleDateFormat}. + */ + SIMPLE { + @Override + UnixTimeFormatter newFormatter(ZoneId zone) { + return new UnixTimeSimpleDateFormatter(zone); + } + }, + /** + * A formatter that supports the same patterns with {@link java.time.format.DateTimeFormatter}. + */ + DATETIME { + @Override + UnixTimeFormatter newFormatter(ZoneId zone) { + return new UnixTimeDateTimeFormatter(zone); + } + }; + /** + * Creates a new formatter with the specified zone id. + * @param zone - the zone id + * @return a new formatter with the specified zone id. + */ + abstract UnixTimeFormatter newFormatter(ZoneId zone); + } + + /** + * Creates a formatter using the specified configuration. + * + * @param conf the configuration to use, not null + * @return the formatter based on the provided configuration, not null. + */ + static UnixTimeFormatter ofConfiguration(Configuration conf) { + ZoneId zoneId = TimestampTZUtil.parseTimeZone(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE)); + Type type = Type.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DATETIME_FORMATTER).toUpperCase()); + return type.newFormatter(zoneId); + } + + /** + * Parses the input text and converts it to seconds since epoch. + * @param text the text to parse, not null + * @return a long number representing the number of seconds since epoch. + * @throws RuntimeException if unable to parse the requested text using the default behavior. + */ + long parse(String text) throws RuntimeException; + + /** + * Parses the input text and converts it to seconds since epoch using the specified pattern. + * @param text the text to parse, not null + * @param pattern the pattern to use to parse the text and resolve it to seconds since epoch + * @return a long number representing the number of seconds since epoch. + * @throws RuntimeException if unable to parse the requested text using the specified pattern. + */ + long parse(String text, String pattern) throws RuntimeException; + + /** + * Formats the specified number of seconds since epoch using the formatters default pattern. + * <p> + * This formats the unixtime to a String using the rules of the underlying formatter. + * </p> + * @param epochSeconds the number of seconds to format + * @return the formatted string, not null + */ + String format(long epochSeconds); + + /** + * Formats the specified number of seconds since epoch using the specified pattern. + * <p> + * This formats the unixtime to a String using specified pattern and the rules of the underlying formatter. + * </p> + * @param epochSeconds the number of seconds to format + * @param pattern the pattern to use for formatting + * @return the formatted string, not null + */ + String format(long epochSeconds, String pattern); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java new file mode 100644 index 00000000000..7c8a6479230 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.time.ZoneId; +import java.util.Objects; +import java.util.function.Function; + +/** + * Formatter that supports caching of patterns to avoid compilation overhead. + * <p> + * At its current state, the cache is very simplistic and just holds the last used pattern in memory. + * </p> + * @param <T> the type of the underlying datetime formatter + */ +abstract class UnixTimeFormatterCache<T> implements UnixTimeFormatter { + + protected final ZoneId zoneId; + protected final Function<String, T> loader; + protected String lastPattern; + protected T formatter; + + protected UnixTimeFormatterCache(ZoneId zoneId, Function<String, T> loader) { + this.zoneId = zoneId; + this.loader = loader; + } + + protected final T getFormatter(String pattern) { + Objects.requireNonNull(pattern); + if (!pattern.equals(lastPattern)) { + lastPattern = pattern; + formatter = loader.apply(pattern); + } + return formatter; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java new file mode 100644 index 00000000000..faffa5819f3 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.time.ZoneId; +import java.time.format.DateTimeParseException; +import java.util.Date; +import java.util.Objects; +import java.util.TimeZone; + +final class UnixTimeSimpleDateFormatter extends UnixTimeFormatterCache<SimpleDateFormat> { + private static final String DEFAULT = "yyyy-MM-dd HH:mm:ss"; + + UnixTimeSimpleDateFormatter(final ZoneId zoneId) { + super(zoneId, s -> { + SimpleDateFormat f = new SimpleDateFormat(s); + f.setTimeZone(TimeZone.getTimeZone(zoneId)); + return f; + }); + } + + @Override + public long parse(final String value) throws RuntimeException { + return parse(value, DEFAULT); + } + + @Override + public long parse(String text, String pattern) { + Objects.requireNonNull(text); + Objects.requireNonNull(pattern); + final SimpleDateFormat formatter = getFormatter(pattern); + ParsePosition pos = new ParsePosition(0); + Date d = formatter.parse(text, pos); + if (d == null) { + throw new DateTimeParseException(text + " cannot be parsed to date. Error at index " + pos.getErrorIndex(), text, + pos.getErrorIndex()); + } + return d.getTime() / 1000; + } + + @Override + public String format(final long epochSeconds) { + return format(epochSeconds, DEFAULT); + } + + @Override + public String format(final long epochSeconds, final String pattern) { + SimpleDateFormat formatter = getFormatter(pattern); + // Convert epochSeconds to milliseconds + Date date = new Date(epochSeconds * 1000L); + return formatter.format(date); + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFUnixTimeStampString.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFUnixTimeStampString.java new file mode 100644 index 00000000000..9dd79d44e92 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFUnixTimeStampString.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; + +import com.opencsv.CSVParser; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvException; + +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.CharacterCodingException; +import java.util.Collection; +import java.util.Objects; + +@RunWith(Parameterized.class) +public class TestVectorUDFUnixTimeStampString { + + private final String value; + private final String zone; + private final String formatter; + private final Long expectedResult; + + public TestVectorUDFUnixTimeStampString(String value, String zone, String formatter, String expectedResult) { + this.value = value; + this.zone = zone; + this.formatter = formatter; + this.expectedResult = expectedResult.equals("null") ? null : Long.parseLong(expectedResult); + } + + @Parameterized.Parameters(name = "('{0}'), zone={1}, parserLegacy={2}") + public static Collection<String[]> readInputs() throws IOException, CsvException { + CSVParser parser = new CSVParserBuilder().withSeparator(';').withIgnoreQuotations(true).build(); + try (InputStream in = TestVectorUDFUnixTimeStampString.class.getResourceAsStream( + "TestVectorUnixTimeStampString.csv")) { + Objects.requireNonNull(in); + try (CSVReader reader = new CSVReaderBuilder(new InputStreamReader(in)).withCSVParser(parser).build()) { + return reader.readAll(); + } + } + } + + @Test + public void testEvaluate() throws HiveException, InterruptedException, CharacterCodingException { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.HIVE_DATETIME_FORMATTER, formatter); + conf.setVar(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE, zone); + SessionState state = SessionState.start(conf); + VectorUDFUnixTimeStampString udf = new VectorUDFUnixTimeStampString(0, 1); + udf.setInputTypeInfos(TypeInfoFactory.stringTypeInfo); + udf.transientInit(conf); + VectorizedRowBatch batch = singleElementRowBatch(value); + udf.evaluate(batch); + LongColumnVector result = (LongColumnVector) batch.cols[1]; + if (expectedResult == null) { + Assert.assertTrue(udfDisplayWithInputs(), result.isNull[0]); + } else { + Assert.assertEquals(udfDisplayWithInputs(), expectedResult.longValue(), result.vector[0]); + } + SessionState.endStart(state); + } + + private String udfDisplayWithInputs() { + return "unix_timestamp(" + value + ") sessionZone=" + zone + ", legacy=" + formatter; + } + + private static VectorizedRowBatch singleElementRowBatch(String e) throws CharacterCodingException { + BytesColumnVector bcv = new BytesColumnVector(); + byte[] encoded = Text.encode(e).array(); + bcv.vector[0] = encoded; + bcv.start[0] = 0; + bcv.length[0] = encoded.length; + + VectorizedRowBatch batch = new VectorizedRowBatch(2); + batch.cols[0] = bcv; + batch.cols[1] = new LongColumnVector(); + batch.size = 1; + return batch; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.java new file mode 100644 index 00000000000..3b6f1eedd13 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +import com.opencsv.CSVParser; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvException; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.Objects; + +import static org.junit.Assert.assertEquals; + +/** + * Tests for {@link GenericUDFFromUnixTime#evaluate(DeferredObject[])} with long value pattern inputs. + */ +@RunWith(Parameterized.class) +public class TestGenericUDFFromUnixTimeEvaluate { + private final GenericUDFFromUnixTime udf = new GenericUDFFromUnixTime(); + private final ObjectInspector[] argInspectors = + new ObjectInspector[] { PrimitiveObjectInspectorFactory.writableLongObjectInspector, + PrimitiveObjectInspectorFactory.writableStringObjectInspector }; + private final String expectedResult; + private final String pattern; + private final String zone; + private final String formatter; + private final long value; + + public TestGenericUDFFromUnixTimeEvaluate(String expectedResult, String pattern, String zone, String formatter, + String value) { + this.value = Long.parseLong(value); + this.pattern = pattern; + this.zone = zone; + this.formatter = formatter; + this.expectedResult = expectedResult; + } + + @Parameterized.Parameters(name = "('{0}','{1}'), zone={2}, formatter={3}") + public static Collection<String[]> readInputs() throws IOException, CsvException { + CSVParser parser = new CSVParserBuilder().withSeparator(';').withIgnoreQuotations(true).build(); + try (InputStream in = TestGenericUDFFromUnixTimeEvaluate.class.getResourceAsStream( + "TestGenericUDFFromUnixTimeEvaluate.csv")) { + Objects.requireNonNull(in); + try (CSVReader reader = new CSVReaderBuilder(new InputStreamReader(in)).withCSVParser(parser).build()) { + return reader.readAll(); + } + } + } + + @Test + public void testEvaluate() throws HiveException, InterruptedException { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.HIVE_DATETIME_FORMATTER, formatter); + conf.setVar(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE, zone); + SessionState state = SessionState.start(conf); + udf.initialize(argInspectors); + try { + Text result = (Text) udf.evaluate(new DeferredObject[] { new DeferredJavaObject(new LongWritable(value)), new DeferredJavaObject(new Text(pattern)) }); + assertEquals(udfDisplayWithInputs(), expectedResult, result.toString()); + }catch (RuntimeException e) { + assertEquals(udfDisplayWithInputs(), expectedResult, e.getMessage()); + } + SessionState.endStart(state); + } + + private String udfDisplayWithInputs() { + return udf.getDisplayString(new String[] { Long.toString(value), pattern }) + " sessionZone=" + zone + ", formatter=" + + formatter; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.java new file mode 100644 index 00000000000..e51b3910458 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +import com.opencsv.CSVParser; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvException; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Arrays; +import java.util.Collection; + +import static org.junit.Assert.assertEquals; + +/** + * Tests for {@link GenericUDFToUnixTimeStamp#evaluate(DeferredObject[])} with string value and pattern inputs. + */ +@RunWith(Parameterized.class) +public class TestGenericUDFToUnixTimestampEvaluateStringString { + private final GenericUDFToUnixTimeStamp udf = new GenericUDFToUnixTimeStamp(); + private final ObjectInspector[] argInspectors = new ObjectInspector[2]; + private final String value; + private final String pattern; + private final String zone; + private final String formatter; + private final LongWritable expectedResult; + + public TestGenericUDFToUnixTimestampEvaluateStringString(String value, String pattern, String zone, String formatter, + String expectedResult) { + this.value = value; + this.pattern = pattern; + this.zone = zone; + this.formatter = formatter; + this.expectedResult = expectedResult.equals("null") ? null : new LongWritable(Long.parseLong(expectedResult)); + Arrays.fill(argInspectors, PrimitiveObjectInspectorFactory.writableStringObjectInspector); + } + + @Parameterized.Parameters(name = "('{0}','{1}'), zone={2}, parserLegacy={3}") + public static Collection<String[]> readInputs() throws IOException, CsvException { + CSVParser parser = new CSVParserBuilder().withSeparator(';').withIgnoreQuotations(true).build(); + try (CSVReader reader = new CSVReaderBuilder(new InputStreamReader( + TestGenericUDFToUnixTimestampEvaluateStringString.class.getResourceAsStream( + "TestGenericUDFToUnixTimestampEvaluateStringString.csv"))).withCSVParser(parser).build()) { + return reader.readAll(); + } + } + + @Test + public void testEvaluate() throws HiveException, InterruptedException { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.HIVE_DATETIME_FORMATTER, formatter); + conf.setVar(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE, zone); + SessionState state = SessionState.start(conf); + udf.initialize(argInspectors); + LongWritable result = (LongWritable) udf.evaluate( + new DeferredObject[] { new DeferredJavaObject(new Text(value)), new DeferredJavaObject(new Text(pattern)) }); + assertEquals(udfDisplayWithInputs(), expectedResult, result); + SessionState.endStart(state); + } + + private String udfDisplayWithInputs() { + return udf.getDisplayString(new String[] { value, pattern }) + " sessionZone=" + zone + ", formatter=" + formatter; + } +} diff --git a/ql/src/test/resources/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUnixTimeStampString.csv b/ql/src/test/resources/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUnixTimeStampString.csv new file mode 100644 index 00000000000..aed76acd807 --- /dev/null +++ b/ql/src/test/resources/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUnixTimeStampString.csv @@ -0,0 +1,36 @@ +1970-01-01 00:00:00;Etc/GMT;DATETIME;0 +1970-01-01 00:00:00;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00;Atlantic/Azores;DATETIME;3600 +1970-01-01 00:00:00;Atlantic/Azores;SIMPLE;3600 +1970-01-01 00:00:00;Europe/Paris;DATETIME;-3600 +1970-01-01 00:00:00;Europe/Paris;SIMPLE;-3600 +1970-01-01 00:00:00 GMT;Etc/GMT;DATETIME;null +1970-01-01 00:00:00 GMT;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00 GMT-01:00;Etc/GMT;DATETIME;null +1970-01-01 00:00:00 GMT-01:00;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00 GMT+01:00;Etc/GMT;DATETIME;null +1970-01-01 00:00:00 GMT+01:00;Etc/GMT;SIMPLE;0 +1800-01-01 00:00:00;Etc/GMT;DATETIME;-5364662400 +1800-01-01 00:00:00;Etc/GMT;SIMPLE;-5364662400 +1800-01-01 00:00:00;Asia/Kolkata;DATETIME;-5364683608 +1800-01-01 00:00:00;Asia/Kolkata;SIMPLE;-5364682200 +Jul 9 2023;Etc/GMT;DATETIME;null +Jul 9 2023;Etc/GMT;SIMPLE;null +Jul 09 2023;Etc/GMT;DATETIME;null +Jul 09 2023;Etc/GMT;SIMPLE;null +Jul 21 2023;Etc/GMT;DATETIME;null +Jul 21 2023;Etc/GMT;SIMPLE;null +Jul 21 2023 09:13;Etc/GMT;DATETIME;null +Jul 21 2023 09:13;Etc/GMT;SIMPLE;null +Jul 21 2023 9:13;Etc/GMT;DATETIME;null +Jul 21 2023 9:13;Etc/GMT;SIMPLE;null +2023-07-21 09:13;Etc/GMT;DATETIME;null +2023-07-21 09:13;Etc/GMT;SIMPLE;null +2023-07-21 9:13;Etc/GMT;DATETIME;null +2023-07-21 9:13;Etc/GMT;SIMPLE;null +2023-07-21 09:13AM;Etc/GMT;DATETIME;null +2023-07-21 09:13AM;Etc/GMT;SIMPLE;null +2023-07-21 09:13:10;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10;Etc/GMT;SIMPLE;1689930790 +2023-07-21 09:13:10.123;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10.123;Etc/GMT;SIMPLE;1689930790 diff --git a/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.csv b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.csv new file mode 100644 index 00000000000..8198c33eee6 --- /dev/null +++ b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.csv @@ -0,0 +1,48 @@ +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;0 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;DATETIME;3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;SIMPLE;3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Europe/Paris;DATETIME;-3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Europe/Paris;SIMPLE;-3600 +1970-01-01 00:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;0 +1970-01-01 00:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;0 +1970-01-01 01:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;3600 +1970-01-01 01:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;3600 +1969-12-31 23:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;-3600 +1969-12-31 23:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;-3600 +1970-01-01 02:00:00 CET;yyyy-MM-dd HH:mm:ss z;Europe/Paris;DATETIME;3600 +1970-01-01 02:00:00 CET;yyyy-MM-dd HH:mm:ss z;Europe/Paris;SIMPLE;3600 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;-5364662400 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;-5364662400 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;-5364683608 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;-5364682200 +Jul 9 2023;MMM d yyyy;Etc/GMT;DATETIME;1688860800 +Jul 9 2023;MMM d yyyy;Etc/GMT;SIMPLE;1688860800 +Jul 09 2023;MMM dd yyyy;Etc/GMT;DATETIME;1688860800 +Jul 09 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1688860800 +Jul 21 2023;MMM dd yyyy;Etc/GMT;DATETIME;1689897600 +Jul 21 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1689897600 +Field DayOfYear cannot be printed as the value 202 exceeds the maximum print width of 2;YYYY-MM-DD;Etc/GMT;DATETIME;1689897600 +2023-07-202;YYYY-MM-DD;Etc/GMT;SIMPLE;1689897600 +Jul 21 2023 09:13;MMM dd yyyy HH:mm;Etc/GMT;DATETIME;1689930780 +Jul 21 2023 09:13;MMM dd yyyy HH:mm;Etc/GMT;SIMPLE;1689930780 +Jul 21 2023 9:13;MMM dd yyyy H:mm;Etc/GMT;DATETIME;1689930780 +Jul 21 2023 9:13;MMM dd yyyy H:mm;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13;yyyy-MM-dd HH:mm;Etc/GMT;DATETIME;1689930780 +2023-07-21 09:13;yyyy-MM-dd HH:mm;Etc/GMT;SIMPLE;1689930780 +2023-07-21 9:13;yyyy-MM-dd H:mm;Etc/GMT;DATETIME;1689930780 +2023-07-21 9:13;yyyy-MM-dd H:mm;Etc/GMT;SIMPLE;1689930780 +Field MilliOfDay cannot be printed as the value 33180000 exceeds the maximum print width of 2;yyyy-MM-dd HH:mmAA;Etc/GMT;DATETIME;1689930780 +Illegal pattern character 'A';yyyy-MM-dd HH:mmAA;Etc/GMT;SIMPLE;1689930780 +Too many pattern letters: a;yyyy-MM-dd HH:mmaa;Etc/GMT;DATETIME;1689930780 +2023-07-21 09:13AM;yyyy-MM-dd HH:mmaa;Etc/GMT;SIMPLE;1689930780 +Too many pattern letters: a;yyyy-MM-dd hh:mmaa;Etc/GMT;DATETIME;1689930780 +2023-07-21 09:13AM;yyyy-MM-dd hh:mmaa;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13PM;yyyy-MM-dd hh:mma;Etc/GMT;DATETIME;1689973980 +2023-07-21 09:13PM;yyyy-MM-dd hh:mma;Etc/GMT;SIMPLE;1689973980 +2023-07-21 09:13:10;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;1689930790 +Too many pattern letters: s;yyyy-MM-dd HH:mm:ss.sss;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10.010;yyyy-MM-dd HH:mm:ss.sss;Etc/GMT;SIMPLE;1689930790 +2023-07-21 09:13:10.000;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10.000;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;DATETIME;1689930790 diff --git a/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.csv b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.csv new file mode 100644 index 00000000000..ff4ee0725df --- /dev/null +++ b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.csv @@ -0,0 +1,54 @@ +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;0 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;DATETIME;3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;SIMPLE;3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Europe/Paris;DATETIME;-3600 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Europe/Paris;SIMPLE;-3600 +1970-01-01 00:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;0 +1970-01-01 00:00:00 GMT;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;0 +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;3600 +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;3600 +1970-01-01 00:00:00 GMT+01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;-3600 +1970-01-01 00:00:00 GMT+01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;-3600 +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Europe/Paris;DATETIME;3600 +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Europe/Paris;SIMPLE;3600 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;-5364662400 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;-5364662400 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;-5364683608 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;-5364682200 +Jul 9 2023;MMM dd yyyy;Etc/GMT;DATETIME;null +Jul 9 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1688860800 +Jul 09 2023;MMM dd yyyy;Etc/GMT;DATETIME;1688860800 +Jul 09 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1688860800 +Jul 21 2023;MMM dd yyyy;Etc/GMT;DATETIME;1689897600 +Jul 21 2023;MMM dd yyyy;Etc/GMT;SIMPLE;1689897600 +2023-07-21;YYYY-MM-DD;Etc/GMT;DATETIME;null +2023-07-21;YYYY-MM-DD;Etc/GMT;SIMPLE;1672531200 +Jul 21 2023 09:13;MMM dd yyyy HH:mm;Etc/GMT;DATETIME;1689930780 +Jul 21 2023 09:13;MMM dd yyyy HH:mm;Etc/GMT;SIMPLE;1689930780 +Jul 21 2023 9:13;MMM dd yyyy HH:mm;Etc/GMT;DATETIME;null +Jul 21 2023 9:13;MMM dd yyyy HH:mm;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13;yyyy-MM-dd HH:mm;Etc/GMT;DATETIME;1689930780 +2023-07-21 09:13;yyyy-MM-dd HH:mm;Etc/GMT;SIMPLE;1689930780 +2023-07-21 9:13;yyyy-MM-dd HH:mm;Etc/GMT;DATETIME;null +2023-07-21 9:13;yyyy-MM-dd HH:mm;Etc/GMT;SIMPLE;1689930780 +2023-07-21 9:13PM;yyyy-MM-dd h:mma;Etc/GMT;DATETIME;1689973980 +2023-07-21 9:13PM;yyyy-MM-dd h:mma;Etc/GMT;SIMPLE;1689973980 +2023-07-21 09:13AM;yyyy-MM-dd HH:mmAA;Etc/GMT;DATETIME;null +2023-07-21 09:13AM;yyyy-MM-dd HH:mmAA;Etc/GMT;SIMPLE;null +2023-07-21 09:13AM;yyyy-MM-dd HH:mmaa;Etc/GMT;DATETIME;null +2023-07-21 09:13AM;yyyy-MM-dd HH:mmaa;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13AM;yyyy-MM-dd HH:mma;Etc/GMT;DATETIME;1689930780 +2023-07-21 09:13AM;yyyy-MM-dd HH:mma;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13PM;yyyy-MM-dd HH:mma;Etc/GMT;DATETIME;null +2023-07-21 09:13PM;yyyy-MM-dd HH:mma;Etc/GMT;SIMPLE;1689930780 +2023-07-21 09:13PM;yyyy-MM-dd hh:mmaa;Etc/GMT;DATETIME;null +2023-07-21 09:13PM;yyyy-MM-dd hh:mmaa;Etc/GMT;SIMPLE;1689973980 +2023-07-21 09:13PM;yyyy-MM-dd hh:mma;Etc/GMT;DATETIME;1689973980 +2023-07-21 09:13PM;yyyy-MM-dd hh:mma;Etc/GMT;SIMPLE;1689973980 +2023-07-21 09:13:10;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;1689930790 +2023-07-21 09:13:10.123;yyyy-MM-dd HH:mm:ss.sss;Etc/GMT;DATETIME;null +2023-07-21 09:13:10.123;yyyy-MM-dd HH:mm:ss.sss;Etc/GMT;SIMPLE;1689930903 +2023-07-21 09:13:10.123;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;DATETIME;1689930790 +2023-07-21 09:13:10.123;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;DATETIME;1689930790