This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 16a39b4fe77 HIVE-27673: Configurable datetime formatter for date_format (Stamatis Zampetakis reviewed by John Sherman, Aman Sinha, Ayush Saxena) 16a39b4fe77 is described below commit 16a39b4fe77be3f204ddcffac607385f683ed129 Author: Stamatis Zampetakis <zabe...@gmail.com> AuthorDate: Fri Sep 8 12:20:37 2023 +0200 HIVE-27673: Configurable datetime formatter for date_format (Stamatis Zampetakis reviewed by John Sherman, Aman Sinha, Ayush Saxena) SimpleDateFormat and DateTimeFormatter present differences in their behavior leading to different query results when date_format is used (after upgrade). To avoid sudden changes in query results and allow users to migrate gradually their query workloads to use the new patterns the `hive.datetime.formatter` property can now be used to select which formatter will be used in date_format. Overview of the change: 1. Generalize the UnixTimeFormatter hierarchy so that it works with Instant, which can represent more than epochSeconds, to be used by GenericUDFDateFormat class. 2. Adapt slightly other classes using the UnixTimeFormatter to pass or retrieve the necessary values from/to Instants. 3. Refactor GenericUDFDateFormat to use the InstantFormatter so that its behavior becomes configurable via `hive.datetime.formatter` property. 4. Add configure method in GenericUDFDateFormat to allowing propagation of sessions level configurations (formatter/zone) to the runtime (Tez, MR, etc.). At runtime there is no SessionState so the only way to configure date_format is using the respective method and global configurations in hive-site.xml, etc. 5. Extend the use of `hive.datetime.formatter` to date_format and update the description in HiveConf mentioning also the bugs that affect SIMPLE formatter. 6. Add unit tests for date_format for both formatters covering: * Reference dates (1970-01-01) and trivial patterns * Invalid date inputs (Jul 9 2023) that cannot be parsed * Timestamp inputs with timezone specification (1970-01-01 00:00:00 GMT-01:00) that is ignored/dropped silently * Current date (2023-07-21 09:13:10.123456789) with nano precision and pattern variations * Patterns ('u','SSSSSSSSS') with different behavior between formatters * Gregorian dates (1800-01-01 00:00:00) before 1900 and different timezones * Julian dates (1000-01-01 00:00:00) and different timezones 7. Add end-to-end tests for ensuring that configuration property takes effect with (in HS2)/without (in Tez) task conversion. Essentially the unit tests also demonstrate existing bugs when the SIMPLE formatter is used affecting Gregorian dates before 1900 and Julian dates. Closes #4675 --- .../hadoop/hive/common/type/TimestampTZ.java | 3 + .../java/org/apache/hadoop/hive/conf/HiveConf.java | 7 +- .../expressions/VectorUDFUnixTimeStampString.java | 8 +- .../hive/ql/udf/generic/GenericUDFDateFormat.java | 57 +++++------ .../ql/udf/generic/GenericUDFFromUnixTime.java | 13 ++- .../ql/udf/generic/GenericUDFToUnixTimeStamp.java | 10 +- ...ormatter.java => InstantDateTimeFormatter.java} | 20 ++-- ...nixTimeFormatter.java => InstantFormatter.java} | 49 +++++----- ...matterCache.java => InstantFormatterCache.java} | 4 +- ...matter.java => InstantSimpleDateFormatter.java} | 20 ++-- .../generic/TestGenericUDFDateFormatEvaluate.java | 104 +++++++++++++++++++++ .../test/queries/clientpositive/udf_date_format.q | 1 + .../clientpositive/udf_date_format_nofetchtask.q | 26 ++++++ ...{udf_date_format.q => udf_date_format_simple.q} | 4 + .../generic/TestGenericUDFDateFormatEvaluate.csv | 40 ++++++++ .../clientpositive/llap/udf_date_format.q.out | 2 +- .../llap/udf_date_format_nofetchtask.q.out | 83 ++++++++++++++++ ...e_format.q.out => udf_date_format_simple.q.out} | 18 ++-- 18 files changed, 369 insertions(+), 100 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java index af181ba124e..3ff06e0eead 100644 --- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java +++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java @@ -100,4 +100,7 @@ public class TimestampTZ implements Comparable<TimestampTZ> { return zonedDateTime.toInstant().getNano(); } + public Instant toInstant() { + return zonedDateTime.toInstant(); + } } diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a76b64f87f5..10b8a34b76e 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3839,10 +3839,11 @@ public class HiveConf extends Configuration { new StringSet("DATETIME", "SIMPLE"), "The formatter to use for handling datetime values. The possible values are:\n" + " * DATETIME: For using java.time.format.DateTimeFormatter\n" + - " * SIMPLE: For using java.text.SimpleDateFormat (known bugs: HIVE-25458, HIVE-25403)\n" + + " * SIMPLE: For using java.text.SimpleDateFormat (known bugs: HIVE-25458, HIVE-25403, HIVE-25268)\n" + "Currently the configuration only affects the behavior of the following SQL functions:\n" + - " * unix_timestamp(string,[string])" + - " * from_unixtime\n\n" + + " * unix_timestamp(string,[string])\n" + + " * from_unixtime\n" + + " * date_format\n\n" + "The SIMPLE formatter exists purely for compatibility purposes with previous versions of Hive thus its use " + "is discouraged. It suffers from known bugs that are unlikely to be fixed in subsequent versions of the product." + "Furthermore, using SIMPLE formatter may lead to strange behavior, and unexpected results when combined " + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java index 278c392aaa1..93cb360fce2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFUnixTimeStampString.java @@ -20,7 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.udf.generic.UnixTimeFormatter; +import org.apache.hadoop.hive.ql.udf.generic.InstantFormatter; import org.apache.hadoop.io.Text; import java.nio.charset.CharacterCodingException; @@ -34,7 +34,7 @@ public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldS private static final long serialVersionUID = 1L; - private transient UnixTimeFormatter formatter; + private transient InstantFormatter formatter; public VectorUDFUnixTimeStampString(int colNum, int outputColumnNum) { super(colNum, outputColumnNum, -1, -1); @@ -48,14 +48,14 @@ public final class VectorUDFUnixTimeStampString extends VectorUDFTimestampFieldS public void transientInit(Configuration conf) throws HiveException { super.transientInit(conf); if (formatter == null) { - formatter = UnixTimeFormatter.ofConfiguration(conf); + formatter = InstantFormatter.ofConfiguration(conf); } } @Override protected long getField(byte[] bytes, int start, int length) throws ParseException { try { - return formatter.parse(Text.decode(bytes, start, length)); + return formatter.parse(Text.decode(bytes, start, length)).getEpochSecond(); } catch (CharacterCodingException | RuntimeException e) { throw new ParseException(e.getMessage(), 0); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java index ead43d225ef..9113817f856 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateFormat.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.udf.generic; import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -31,11 +33,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.Pr import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; -import java.time.Instant; import java.time.ZoneId; -import java.time.ZoneOffset; -import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.DATE_GROUP; import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; @@ -49,8 +47,7 @@ import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveO */ @Description(name = "date_format", value = "_FUNC_(date/timestamp/string, fmt) - converts a date/timestamp/string " + "to a value of string in the format specified by the date format fmt.", - extended = "Supported formats are DateTimeFormatter formats - " - + "https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html. " + extended = "Supported formats are defined by the underlying formatter implementation (hive.datetime.formatter). " + "Second argument fmt should be constant.\n" + "Example: > SELECT _FUNC_('2015-04-08', 'y');\n '2015'") public class GenericUDFDateFormat extends GenericUDF { @@ -59,7 +56,8 @@ public class GenericUDFDateFormat extends GenericUDF { private final Text output = new Text(); private transient ZoneId timeZone; - private transient DateTimeFormatter formatter; + private transient InstantFormatter formatter; + private transient String fmtStr; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -75,19 +73,15 @@ public class GenericUDFDateFormat extends GenericUDF { obtainTimestampConverter(arguments, 0, tsInputTypes, tsConverters); + HiveConf conf = SessionState.getSessionConf(); + if (formatter == null) { + formatter = InstantFormatter.ofConfiguration(conf); + } + if (timeZone == null) { + timeZone = conf.getLocalTimeZone(); + } if (arguments[1] instanceof ConstantObjectInspector) { - String fmtStr = getConstantStringValue(arguments, 1); - if (fmtStr != null) { - try { - if (timeZone == null) { - timeZone = SessionState.get() == null ? new HiveConf().getLocalTimeZone() : SessionState.get().getConf() - .getLocalTimeZone(); - } - formatter = DateTimeFormatter.ofPattern(fmtStr); - } catch (IllegalArgumentException e) { - // ignore - } - } + fmtStr = getConstantStringValue(arguments, 1); } else { throw new UDFArgumentTypeException(1, getFuncName() + " only takes constant as " + getArgOrder(1) + " argument"); } @@ -95,9 +89,19 @@ public class GenericUDFDateFormat extends GenericUDF { return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } + @Override + public void configure(final MapredContext context) { + super.configure(context); + if (context != null) { + formatter = InstantFormatter.ofConfiguration(context.getJobConf()); + String timeZoneStr = HiveConf.getVar(context.getJobConf(), HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE); + timeZone = TimestampTZUtil.parseTimeZone(timeZoneStr); + } + } + @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - if (formatter == null) { + if (fmtStr == null) { return null; } @@ -109,12 +113,13 @@ public class GenericUDFDateFormat extends GenericUDF { return null; } - Instant instant = Instant.ofEpochSecond(ts.toEpochSecond(), ts.getNanos()); - ZonedDateTime zonedDateTime = ZonedDateTime.ofInstant(instant, ZoneOffset.UTC); - String res = formatter.format(zonedDateTime.withZoneSameLocal(timeZone)); - - output.set(res); - return output; + try { + String res = formatter.format(TimestampTZUtil.convert(ts, timeZone).toInstant(), fmtStr); + output.set(res); + return output; + } catch (RuntimeException e) { + return null; + } } @Override diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java index 99ca59749c6..03747c98ace 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUnixTime.java @@ -31,6 +31,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspecto import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Text; + +import java.time.Instant; + import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping.STRING_GROUP; /** @@ -47,7 +50,7 @@ public class GenericUDFFromUnixTime extends GenericUDF { private transient IntObjectInspector inputIntOI; private transient LongObjectInspector inputLongOI; private transient final Text result = new Text(); - private transient UnixTimeFormatter formatter; + private transient InstantFormatter formatter; private transient Converter[] converters = new Converter[2]; private transient PrimitiveObjectInspector.PrimitiveCategory[] inputTypes = new PrimitiveObjectInspector.PrimitiveCategory[2]; @@ -77,7 +80,7 @@ public class GenericUDFFromUnixTime extends GenericUDF { obtainStringConverter(arguments, 1, inputTypes, converters); } if (formatter == null) { - formatter = UnixTimeFormatter.ofConfiguration(SessionState.get() == null ? new HiveConf() : SessionState.getSessionConf()); + formatter = InstantFormatter.ofConfiguration(SessionState.get() == null ? new HiveConf() : SessionState.getSessionConf()); } return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } @@ -85,7 +88,7 @@ public class GenericUDFFromUnixTime extends GenericUDF { @Override public void configure(MapredContext context) { if (context != null) { - formatter = UnixTimeFormatter.ofConfiguration(context.getJobConf()); + formatter = InstantFormatter.ofConfiguration(context.getJobConf()); } } @@ -101,9 +104,9 @@ public class GenericUDFFromUnixTime extends GenericUDF { if (format == null) { return null; } - result.set(formatter.format(unixTime, format)); + result.set(formatter.format(Instant.ofEpochSecond(unixTime), format)); } else { - result.set(formatter.format(unixTime)); + result.set(formatter.format(Instant.ofEpochSecond(unixTime))); } return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java index 788a83a88b0..f32ddd0d570 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToUnixTimeStamp.java @@ -60,7 +60,7 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { private transient TimestampLocalTZObjectInspector inputTimestampLocalTzOI; private transient Converter inputTextConverter; private transient Converter patternConverter; - private transient UnixTimeFormatter formatter; + private transient InstantFormatter formatter; private transient ZoneId timeZone; @Override @@ -100,7 +100,7 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { PrimitiveObjectInspectorFactory.javaStringObjectInspector); } if (formatter == null) { - formatter = UnixTimeFormatter.ofConfiguration(conf); + formatter = InstantFormatter.ofConfiguration(conf); } break; case DATE: @@ -125,7 +125,7 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { @Override public void configure(MapredContext context) { if (context != null) { - formatter = UnixTimeFormatter.ofConfiguration(context.getJobConf()); + formatter = InstantFormatter.ofConfiguration(context.getJobConf()); String timeZoneStr = HiveConf.getVar(context.getJobConf(), HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE); timeZone = TimestampTZUtil.parseTimeZone(timeZoneStr); } @@ -150,9 +150,9 @@ public class GenericUDFToUnixTimeStamp extends GenericUDF { try { final long epochSeconds; if (patternConverter == null) { - epochSeconds = formatter.parse(textVal); + epochSeconds = formatter.parse(textVal).getEpochSecond(); } else { - epochSeconds = formatter.parse(textVal, (String) patternConverter.convert(arguments[1].get())); + epochSeconds = formatter.parse(textVal, (String) patternConverter.convert(arguments[1].get())).getEpochSecond(); } retValue.set(epochSeconds); return retValue; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantDateTimeFormatter.java similarity index 78% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantDateTimeFormatter.java index b45a595e2cc..67ca27e5773 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantDateTimeFormatter.java @@ -29,23 +29,23 @@ import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatterBuilder; import java.util.Objects; -final class UnixTimeDateTimeFormatter extends UnixTimeFormatterCache<DateTimeFormatter> { +final class InstantDateTimeFormatter extends InstantFormatterCache<DateTimeFormatter> { - UnixTimeDateTimeFormatter(final ZoneId zoneId) { + InstantDateTimeFormatter(final ZoneId zoneId) { super(zoneId, s -> new DateTimeFormatterBuilder().parseCaseInsensitive().appendPattern(s).toFormatter().withZone(zoneId)); } @Override - public long parse(String text) throws RuntimeException { + public Instant parse(String text) throws RuntimeException { Objects.requireNonNull(text); Timestamp timestamp = Timestamp.valueOf(text); TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp, zoneId); - return timestampTZ.getEpochSecond(); + return Instant.ofEpochSecond(timestampTZ.getEpochSecond(), timestampTZ.getNanos()); } @Override - public long parse(String text, String pattern) { + public Instant parse(String text, String pattern) { Objects.requireNonNull(text); Objects.requireNonNull(pattern); Timestamp timestamp; @@ -57,19 +57,17 @@ final class UnixTimeDateTimeFormatter extends UnixTimeFormatterCache<DateTimeFor LocalDate localDate = LocalDate.parse(text, formatter); timestamp = new Timestamp(localDate.atStartOfDay()); } - TimestampTZ timestampTZ = TimestampTZUtil.convert(timestamp, zoneId); - return timestampTZ.getEpochSecond(); + return TimestampTZUtil.convert(timestamp, zoneId).toInstant(); } @Override - public String format(final long epochSeconds) { - return format(epochSeconds, "uuuu-MM-dd HH:mm:ss"); + public String format(final Instant instant) { + return format(instant, "uuuu-MM-dd HH:mm:ss"); } @Override - public String format(final long epochSeconds, final String pattern) { + public String format(final Instant instant, final String pattern) { DateTimeFormatter formatter = getFormatter(pattern); - Instant instant = Instant.ofEpochSecond(epochSeconds); ZonedDateTime zonedDT = ZonedDateTime.ofInstant(instant, zoneId); return zonedDT.format(formatter); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatter.java similarity index 65% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatter.java index de4599006e2..382a10089dd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatter.java @@ -20,13 +20,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.TimestampTZUtil; import org.apache.hadoop.hive.conf.HiveConf; +import java.time.Instant; import java.time.ZoneId; /** - * Formatter for parsing and printing unixtime objects (long numbers representing seconds since epoch). + * Formatter for parsing and printing {@link Instant} objects. * <p> * This interface provides the main entry point for print and parsing and provides factories for the - * available implementations of {@code UnixTimeFormatter}. + * available implementations of {@code InstantFormatter}. * </p> * <p> * The patterns that are supported and their behavior depend on the underlying implementation of the interface. @@ -35,7 +36,7 @@ import java.time.ZoneId; * Implementations of the interface are not meant to be thread safe. * </p> */ -public interface UnixTimeFormatter { +public interface InstantFormatter { /** * Types for the built-in formatter implementations. @@ -46,8 +47,8 @@ public interface UnixTimeFormatter { */ SIMPLE { @Override - UnixTimeFormatter newFormatter(ZoneId zone) { - return new UnixTimeSimpleDateFormatter(zone); + InstantFormatter newFormatter(ZoneId zone) { + return new InstantSimpleDateFormatter(zone); } }, /** @@ -55,8 +56,8 @@ public interface UnixTimeFormatter { */ DATETIME { @Override - UnixTimeFormatter newFormatter(ZoneId zone) { - return new UnixTimeDateTimeFormatter(zone); + InstantFormatter newFormatter(ZoneId zone) { + return new InstantDateTimeFormatter(zone); } }; /** @@ -64,7 +65,7 @@ public interface UnixTimeFormatter { * @param zone - the zone id * @return a new formatter with the specified zone id. */ - abstract UnixTimeFormatter newFormatter(ZoneId zone); + abstract InstantFormatter newFormatter(ZoneId zone); } /** @@ -73,47 +74,47 @@ public interface UnixTimeFormatter { * @param conf the configuration to use, not null * @return the formatter based on the provided configuration, not null. */ - static UnixTimeFormatter ofConfiguration(Configuration conf) { + static InstantFormatter ofConfiguration(Configuration conf) { ZoneId zoneId = TimestampTZUtil.parseTimeZone(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE)); Type type = Type.valueOf(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_DATETIME_FORMATTER).toUpperCase()); return type.newFormatter(zoneId); } /** - * Parses the input text and converts it to seconds since epoch. + * Parses the input text and converts it to an instant. * @param text the text to parse, not null - * @return a long number representing the number of seconds since epoch. + * @return an Instant representing a specific point in time. * @throws RuntimeException if unable to parse the requested text using the default behavior. */ - long parse(String text) throws RuntimeException; + Instant parse(String text) throws RuntimeException; /** - * Parses the input text and converts it to seconds since epoch using the specified pattern. + * Parses the input text and converts it to an instant using the specified pattern. * @param text the text to parse, not null - * @param pattern the pattern to use to parse the text and resolve it to seconds since epoch - * @return a long number representing the number of seconds since epoch. + * @param pattern the pattern to use to parse the text and resolve it to an instant + * @return an Instant representing a specific point in time. * @throws RuntimeException if unable to parse the requested text using the specified pattern. */ - long parse(String text, String pattern) throws RuntimeException; + Instant parse(String text, String pattern) throws RuntimeException; /** - * Formats the specified number of seconds since epoch using the formatters default pattern. + * Formats the specified instant using the formatters default pattern. * <p> - * This formats the unixtime to a String using the rules of the underlying formatter. + * This formats the instant to a String using the rules of the underlying formatter. * </p> - * @param epochSeconds the number of seconds to format + * @param instant the instant to format * @return the formatted string, not null */ - String format(long epochSeconds); + String format(Instant instant); /** - * Formats the specified number of seconds since epoch using the specified pattern. + * Formats the specified instant using the specified pattern. * <p> - * This formats the unixtime to a String using specified pattern and the rules of the underlying formatter. + * This formats the instant to a String using specified pattern and the rules of the underlying formatter. * </p> - * @param epochSeconds the number of seconds to format + * @param instant the instant to format * @param pattern the pattern to use for formatting * @return the formatted string, not null */ - String format(long epochSeconds, String pattern); + String format(Instant instant, String pattern); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatterCache.java similarity index 91% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatterCache.java index 7c8a6479230..7cb42fa2ab0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantFormatterCache.java @@ -27,14 +27,14 @@ import java.util.function.Function; * </p> * @param <T> the type of the underlying datetime formatter */ -abstract class UnixTimeFormatterCache<T> implements UnixTimeFormatter { +abstract class InstantFormatterCache<T> implements InstantFormatter { protected final ZoneId zoneId; protected final Function<String, T> loader; protected String lastPattern; protected T formatter; - protected UnixTimeFormatterCache(ZoneId zoneId, Function<String, T> loader) { + protected InstantFormatterCache(ZoneId zoneId, Function<String, T> loader) { this.zoneId = zoneId; this.loader = loader; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantSimpleDateFormatter.java similarity index 77% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantSimpleDateFormatter.java index faffa5819f3..ed66111f7bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/InstantSimpleDateFormatter.java @@ -18,16 +18,17 @@ package org.apache.hadoop.hive.ql.udf.generic; import java.text.ParsePosition; import java.text.SimpleDateFormat; +import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeParseException; import java.util.Date; import java.util.Objects; import java.util.TimeZone; -final class UnixTimeSimpleDateFormatter extends UnixTimeFormatterCache<SimpleDateFormat> { +final class InstantSimpleDateFormatter extends InstantFormatterCache<SimpleDateFormat> { private static final String DEFAULT = "yyyy-MM-dd HH:mm:ss"; - UnixTimeSimpleDateFormatter(final ZoneId zoneId) { + InstantSimpleDateFormatter(final ZoneId zoneId) { super(zoneId, s -> { SimpleDateFormat f = new SimpleDateFormat(s); f.setTimeZone(TimeZone.getTimeZone(zoneId)); @@ -36,12 +37,12 @@ final class UnixTimeSimpleDateFormatter extends UnixTimeFormatterCache<SimpleDat } @Override - public long parse(final String value) throws RuntimeException { + public Instant parse(final String value) throws RuntimeException { return parse(value, DEFAULT); } @Override - public long parse(String text, String pattern) { + public Instant parse(String text, String pattern) { Objects.requireNonNull(text); Objects.requireNonNull(pattern); final SimpleDateFormat formatter = getFormatter(pattern); @@ -51,19 +52,18 @@ final class UnixTimeSimpleDateFormatter extends UnixTimeFormatterCache<SimpleDat throw new DateTimeParseException(text + " cannot be parsed to date. Error at index " + pos.getErrorIndex(), text, pos.getErrorIndex()); } - return d.getTime() / 1000; + return Instant.ofEpochMilli(d.getTime()); } @Override - public String format(final long epochSeconds) { - return format(epochSeconds, DEFAULT); + public String format(final Instant instant) { + return format(instant, DEFAULT); } @Override - public String format(final long epochSeconds, final String pattern) { + public String format(final Instant instant, final String pattern) { SimpleDateFormat formatter = getFormatter(pattern); - // Convert epochSeconds to milliseconds - Date date = new Date(epochSeconds * 1000L); + Date date = new Date(instant.toEpochMilli()); return formatter.format(date); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.java new file mode 100644 index 00000000000..024bd6c2723 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredJavaObject; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; + +import com.opencsv.CSVParser; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; +import com.opencsv.exceptions.CsvException; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Collection; +import java.util.Objects; + +import static org.junit.Assert.assertEquals; + +/** + * Tests for {@link GenericUDFDateFormat#evaluate(DeferredObject[])} with string value inputs. + */ +@RunWith(Parameterized.class) +public class TestGenericUDFDateFormatEvaluate { + private final GenericUDFDateFormat udf = new GenericUDFDateFormat(); + private final String value; + private final String pattern; + private final String zone; + private final String formatter; + private final Text expectedResult; + + public TestGenericUDFDateFormatEvaluate(String value, String pattern, String zone, String formatter, + String expectedResult) { + this.value = value; + this.pattern = pattern; + this.zone = zone; + this.formatter = formatter; + this.expectedResult = expectedResult.equals("null") ? null : new Text(expectedResult); + } + + @Parameterized.Parameters(name = "date_format('{0}','{1}'), zone={2}, formatter={3}") + public static Collection<String[]> readInputs() throws IOException, CsvException { + CSVParser parser = new CSVParserBuilder().withSeparator(';').withIgnoreQuotations(true).build(); + try (InputStream in = TestGenericUDFDateFormatEvaluate.class.getResourceAsStream( + "TestGenericUDFDateFormatEvaluate.csv")) { + Objects.requireNonNull(in); + try (CSVReader reader = new CSVReaderBuilder(new InputStreamReader(in)) + .withSkipLines(1) // Skip header + .withCSVParser(parser) + .build()) { + return reader.readAll(); + } + } + } + + @Test + public void testEvaluate() throws HiveException, InterruptedException { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.HIVE_DATETIME_FORMATTER, formatter); + conf.setVar(HiveConf.ConfVars.HIVE_LOCAL_TIME_ZONE, zone); + SessionState state = SessionState.start(conf); + ObjectInspector[] argInspectors = + new ObjectInspector[] { PrimitiveObjectInspectorFactory.writableStringObjectInspector, + PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, + new Text(pattern)) }; + udf.initialize(argInspectors); + Text result = (Text) udf.evaluate(new DeferredObject[] { new DeferredJavaObject(new Text(value)) }); + assertEquals(udfDisplayWithInputs(), expectedResult, result); + SessionState.endStart(state); + } + + private String udfDisplayWithInputs() { + return udf.getDisplayString(new String[] { value, pattern }) + " sessionZone=" + zone + ", formatter=" + formatter; + } +} diff --git a/ql/src/test/queries/clientpositive/udf_date_format.q b/ql/src/test/queries/clientpositive/udf_date_format.q index 583b1d98d97..9e9af09adfe 100644 --- a/ql/src/test/queries/clientpositive/udf_date_format.q +++ b/ql/src/test/queries/clientpositive/udf_date_format.q @@ -1,5 +1,6 @@ DESCRIBE FUNCTION date_format; DESC FUNCTION EXTENDED date_format; +set hive.datetime.formatter=DATETIME; set hive.local.time.zone=Africa/Johannesburg; diff --git a/ql/src/test/queries/clientpositive/udf_date_format_nofetchtask.q b/ql/src/test/queries/clientpositive/udf_date_format_nofetchtask.q new file mode 100644 index 00000000000..88487669dbf --- /dev/null +++ b/ql/src/test/queries/clientpositive/udf_date_format_nofetchtask.q @@ -0,0 +1,26 @@ +CREATE TABLE date_table (date_string string); +INSERT INTO date_table VALUES ('2023-09-13'), ('2023-09-14'), ('2023-09-15'); +-- Task conversion is disabled to ensure/test that session configurations take effect +-- when tasks are not run inside HS2 but in Tez, MapReduce, etc. +set hive.fetch.task.conversion=none; +set hive.datetime.formatter=SIMPLE; + +set hive.local.time.zone=Asia/Bangkok; +SELECT date_format(date_string, 'u z') FROM date_table; + +set hive.local.time.zone=Australia/Sydney; +SELECT date_format(date_string, 'u z') FROM date_table; + +set hive.local.time.zone=Africa/Johannesburg; +SELECT date_format(date_string, 'u z') FROM date_table; + +set hive.datetime.formatter=DATETIME; + +set hive.local.time.zone=Asia/Bangkok; +SELECT date_format(date_string, 'u z') FROM date_table; + +set hive.local.time.zone=Australia/Sydney; +SELECT date_format(date_string, 'u z') FROM date_table; + +set hive.local.time.zone=Africa/Johannesburg; +SELECT date_format(date_string, 'u z') FROM date_table; diff --git a/ql/src/test/queries/clientpositive/udf_date_format.q b/ql/src/test/queries/clientpositive/udf_date_format_simple.q similarity index 92% copy from ql/src/test/queries/clientpositive/udf_date_format.q copy to ql/src/test/queries/clientpositive/udf_date_format_simple.q index 583b1d98d97..1043cc14e2f 100644 --- a/ql/src/test/queries/clientpositive/udf_date_format.q +++ b/ql/src/test/queries/clientpositive/udf_date_format_simple.q @@ -1,5 +1,6 @@ DESCRIBE FUNCTION date_format; DESC FUNCTION EXTENDED date_format; +set hive.datetime.formatter=SIMPLE; set hive.local.time.zone=Africa/Johannesburg; @@ -75,6 +76,9 @@ select date_format("2015-04-08 10:30:45","yyyy-MM-dd HH:mm:ss.SSS z"); set hive.local.time.zone=Europe/Berlin; select date_format("2015-04-08 10:30:45","yyyy-MM-dd HH:mm:ss.SSS z"); +-- Dates prior to 1900 do not behave well when SIMPLE formatter is used (HIVE-25268) so the results below are not +-- really the expected ones. However, the results document the current behavior so it is useful to have them. + --julian date set hive.local.time.zone=UTC; select date_format("1001-01-05","dd---MM--yyyy"); diff --git a/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.csv b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.csv new file mode 100644 index 00000000000..401e1ef7dec --- /dev/null +++ b/ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFDateFormatEvaluate.csv @@ -0,0 +1,40 @@ +Input datetime;Input pattern;Local timezone;Formatter type;Expected output +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;1970-01-01 00:00:00 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;1970-01-01 00:00:00 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;DATETIME;1970-01-01 00:00:00 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Atlantic/Azores;SIMPLE;1970-01-01 00:00:00 +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;1970-01-01 00:00:00 GMT +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;1970-01-01 00:00:00 GMT +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss z;Atlantic/Azores;DATETIME;1970-01-01 00:00:00 AZOT +1970-01-01 00:00:00;yyyy-MM-dd HH:mm:ss z;Atlantic/Azores;SIMPLE;1970-01-01 00:00:00 AZOT +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;DATETIME;1970-01-01 00:00:00 GMT +1970-01-01 00:00:00 GMT-01:00;yyyy-MM-dd HH:mm:ss z;Etc/GMT;SIMPLE;1970-01-01 00:00:00 GMT +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;1800-01-01 00:00:00 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;1800-01-01 00:00:00 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;1800-01-01 00:00:00 +1800-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;1799-12-31 23:36:32 +Jul 9 2023;MMM dd yyyy;Etc/GMT;DATETIME;null +Jul 9 2023;MMM dd yyyy;Etc/GMT;SIMPLE;null +2023-07-21;DD;Etc/GMT;DATETIME;null +2023-07-21;DD;Etc/GMT;SIMPLE;202 +2023-07-21;DDD;Etc/GMT;DATETIME;202 +2023-07-21;DDD;Etc/GMT;SIMPLE;202 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;2023-07-21 09:13:10 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;2023-07-21 09:13:10 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss.SSSSSSSSS;Etc/GMT;DATETIME;2023-07-21 09:13:10.123456789 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss.SSSSSSSSS;Etc/GMT;SIMPLE;2023-07-21 09:13:10.000000123 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;DATETIME;2023-07-21 09:13:10.123 +2023-07-21 09:13:10.123456789;yyyy-MM-dd HH:mm:ss.SSS;Etc/GMT;SIMPLE;2023-07-21 09:13:10.123 +2023-07-21 09:13:10.123456789;MMM dd yyyy;Etc/GMT;DATETIME;Jul 21 2023 +2023-07-21 09:13:10.123456789;MMM dd yyyy;Etc/GMT;SIMPLE;Jul 21 2023 +2023-07-21 09:13:10.123456789;u;Etc/GMT;DATETIME;2023 +2023-07-21 09:13:10.123456789;u;Etc/GMT;SIMPLE;5 +2023-07-21 09:13:10.123456789;e;Etc/GMT;DATETIME;6 +2023-07-21 09:13:10.123456789;EEEE;Etc/GMT;DATETIME;Friday +2023-07-21 09:13:10.123456789;EEEE;Etc/GMT;SIMPLE;Friday +2023-07-21 09:13:10.123456789;a;Etc/GMT;DATETIME;AM +2023-07-21 09:13:10.123456789;a;Etc/GMT;SIMPLE;AM +1000-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;DATETIME;1000-01-01 00:00:00 +1000-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Etc/GMT;SIMPLE;0999-12-27 00:00:00 +1000-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;1000-01-01 00:00:00 +1000-01-01 00:00:00;yyyy-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;0999-12-26 23:36:32 diff --git a/ql/src/test/results/clientpositive/llap/udf_date_format.q.out b/ql/src/test/results/clientpositive/llap/udf_date_format.q.out index fbb6240ee5a..33db7e82a42 100644 --- a/ql/src/test/results/clientpositive/llap/udf_date_format.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_date_format.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESC FUNCTION EXTENDED date_format POSTHOOK: type: DESCFUNCTION date_format(date/timestamp/string, fmt) - converts a date/timestamp/string to a value of string in the format specified by the date format fmt. -Supported formats are DateTimeFormatter formats - https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html. Second argument fmt should be constant. +Supported formats are defined by the underlying formatter implementation (hive.datetime.formatter). Second argument fmt should be constant. Example: > SELECT date_format('2015-04-08', 'y'); '2015' Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateFormat diff --git a/ql/src/test/results/clientpositive/llap/udf_date_format_nofetchtask.q.out b/ql/src/test/results/clientpositive/llap/udf_date_format_nofetchtask.q.out new file mode 100644 index 00000000000..3f2f0381c11 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/udf_date_format_nofetchtask.q.out @@ -0,0 +1,83 @@ +PREHOOK: query: CREATE TABLE date_table (date_string string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@date_table +POSTHOOK: query: CREATE TABLE date_table (date_string string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@date_table +PREHOOK: query: INSERT INTO date_table VALUES ('2023-09-13'), ('2023-09-14'), ('2023-09-15') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@date_table +POSTHOOK: query: INSERT INTO date_table VALUES ('2023-09-13'), ('2023-09-14'), ('2023-09-15') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@date_table +POSTHOOK: Lineage: date_table.date_string SCRIPT [] +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +3 ICT +4 ICT +5 ICT +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +3 AEST +4 AEST +5 AEST +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +3 SAST +4 SAST +5 SAST +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +2023 ICT +2023 ICT +2023 ICT +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +2023 AEST +2023 AEST +2023 AEST +PREHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +PREHOOK: type: QUERY +PREHOOK: Input: default@date_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT date_format(date_string, 'u z') FROM date_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_table +#### A masked pattern was here #### +2023 SAST +2023 SAST +2023 SAST diff --git a/ql/src/test/results/clientpositive/llap/udf_date_format.q.out b/ql/src/test/results/clientpositive/llap/udf_date_format_simple.q.out similarity index 96% copy from ql/src/test/results/clientpositive/llap/udf_date_format.q.out copy to ql/src/test/results/clientpositive/llap/udf_date_format_simple.q.out index fbb6240ee5a..74aa9890381 100644 --- a/ql/src/test/results/clientpositive/llap/udf_date_format.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_date_format_simple.q.out @@ -8,7 +8,7 @@ PREHOOK: type: DESCFUNCTION POSTHOOK: query: DESC FUNCTION EXTENDED date_format POSTHOOK: type: DESCFUNCTION date_format(date/timestamp/string, fmt) - converts a date/timestamp/string to a value of string in the format specified by the date format fmt. -Supported formats are DateTimeFormatter formats - https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html. Second argument fmt should be constant. +Supported formats are defined by the underlying formatter implementation (hive.datetime.formatter). Second argument fmt should be constant. Example: > SELECT date_format('2015-04-08', 'y'); '2015' Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFDateFormat @@ -92,7 +92,7 @@ date_format(cast(null as string), 'dd') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -10 30 45 09 PM 08 1 08 08 NULL +10 30 45 09 PM 08 123 08 08 NULL PREHOOK: query: select date_format(cast('2015-04-08' as date), 'EEEE'), date_format(cast('2015-04-08' as date), 'G'), @@ -195,7 +195,7 @@ POSTHOOK: query: select date_format("1001-01-05","dd---MM--yyyy") POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -05---01--1001 +30---12--1000 PREHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -204,7 +204,7 @@ POSTHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1400-01-14 01:01:10.123 ICT +1400-01-06 01:19:06.123 ICT PREHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -213,7 +213,7 @@ POSTHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1800-01-14 01:01:10.123 ICT +1800-01-14 01:19:06.123 ICT PREHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -222,7 +222,7 @@ POSTHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1400-01-14 01:01:10.123 CET +1400-01-06 01:07:42.123 CET PREHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -231,7 +231,7 @@ POSTHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1800-01-14 01:01:10.123 CET +1800-01-14 01:07:42.123 CET PREHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -240,7 +240,7 @@ POSTHOOK: query: select date_format('1400-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1400-01-14 01:01:10.123 SAST +1400-01-06 01:09:10.123 SAST PREHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm:ss.SSS z') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -249,4 +249,4 @@ POSTHOOK: query: select date_format('1800-01-14 01:01:10.123', 'yyyy-MM-dd HH:mm POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### -1800-01-14 01:01:10.123 SAST +1800-01-14 01:09:10.123 SAST