[hive] branch master updated: HIVE-27675: Support keystore/truststore types for hive to zookeeper integration (Naveen Gangam) (#4691)

2023-09-15 Thread ngangam
This is an automated email from the ASF dual-hosted git repository.

ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 59570d6202e HIVE-27675: Support keystore/truststore types for hive to 
zookeeper integration (Naveen Gangam) (#4691)
59570d6202e is described below

commit 59570d6202e6c29dad0824af80e241339cf89f83
Author: Naveen Gangam 
AuthorDate: Fri Sep 15 10:38:20 2023 -0400

HIVE-27675: Support keystore/truststore types for hive to zookeeper 
integration (Naveen Gangam) (#4691)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java | 16 +--
 .../hcatalog/templeton/tool/ZooKeeperStorage.java  |  4 
 .../security/ZooKeeperTokenStoreTestBase.java  |  5 +
 .../org/apache/hive/jdbc/TestRestrictedList.java   |  2 ++
 .../InformationSchemaWithPrivilegeTestBase.java|  5 +
 .../java/org/apache/hive/jdbc/HiveConnection.java  |  4 +++-
 jdbc/src/java/org/apache/hive/jdbc/Utils.java  | 24 +-
 .../hive/jdbc/ZooKeeperHiveClientHelper.java   |  9 ++--
 .../hadoop/hive/registry/impl/ZkRegistryBase.java  |  2 ++
 .../hadoop/hive/common/SSLZookeeperFactory.java| 14 ++---
 .../hadoop/hive/common/ZooKeeperHiveHelper.java| 24 +-
 .../hadoop/hive/metastore/conf/MetastoreConf.java  | 14 -
 .../security/MetastoreDelegationTokenManager.java  |  4 
 .../metastore/security/ZooKeeperTokenStore.java| 10 +
 14 files changed, 126 insertions(+), 11 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 10b8a34b76e..0d7a8d072e5 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3012,6 +3012,10 @@ public class HiveConf extends Configuration {
 "Keystore password when using a client-side certificate with TLS 
connectivity to ZooKeeper." +
 "Overrides any explicit value set via the 
zookeeper.ssl.keyStore.password " +
  "system property (note the camelCase)."),
+HIVE_ZOOKEEPER_SSL_KEYSTORE_TYPE("hive.zookeeper.ssl.keystore.type", "",
+"Keystore type when using a client-side certificate with TLS 
connectivity to ZooKeeper." +
+"Overrides any explicit value set via the 
zookeeper.ssl.keyStore.type " +
+"system property (note the camelCase)."),
 
HIVE_ZOOKEEPER_SSL_TRUSTSTORE_LOCATION("hive.zookeeper.ssl.truststore.location",
 "",
 "Truststore location when using a client-side certificate with TLS 
connectivity to ZooKeeper. " +
 "Overrides any explicit value set via the 
zookeeper.ssl.trustStore.location" +
@@ -3020,6 +3024,10 @@ public class HiveConf extends Configuration {
 "Truststore password when using a client-side certificate with TLS 
connectivity to ZooKeeper." +
 "Overrides any explicit value set via the 
zookeeper.ssl.trustStore.password " +
  "system property (note the camelCase)."),
+HIVE_ZOOKEEPER_SSL_TRUSTSTORE_TYPE("hive.zookeeper.ssl.truststore.type", 
"",
+"Truststore type when using a client-side certificate with TLS 
connectivity to ZooKeeper." +
+"Overrides any explicit value set via the 
zookeeper.ssl.trustStore.type " +
+"system property (note the camelCase)."),
 HIVE_ZOOKEEPER_KILLQUERY_ENABLE("hive.zookeeper.killquery.enable", true,
 "Whether enabled kill query coordination with zookeeper, " +
 "when hive.server2.support.dynamic.service.discovery is enabled."),
@@ -5550,8 +5558,10 @@ public class HiveConf extends Configuration {
 "hive.driver.parallel.compilation.global.limit," +
 "hive.zookeeper.ssl.keystore.location," +
 "hive.zookeeper.ssl.keystore.password," +
+"hive.zookeeper.ssl.keystore.type," +
 "hive.zookeeper.ssl.truststore.location," +
-"hive.zookeeper.ssl.truststore.password",
+"hive.zookeeper.ssl.truststore.password," +
+"hive.zookeeper.ssl.truststore.type",
 "Comma separated list of configuration options which are immutable at 
runtime"),
 HIVE_CONF_HIDDEN_LIST("hive.conf.hidden.list",
 METASTOREPWD.varname + "," + HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname
@@ -6377,8 +6387,10 @@ public class HiveConf extends Configuration {
   .sslEnabled(getBoolVar(ConfVars.HIVE_ZOOKEEPER_SSL_ENABLE))
   .keyStoreLocation(getVar(ConfVars.HIVE_ZOOKEEPER_SSL_KEYSTORE_LOCATION))
   .keyStorePassword(keyStorePassword)
+  .keyStoreType(getVar(ConfVars.HIVE_ZOOKEEPER_SSL_KEYSTORE_TYPE))
   
.trustStoreLocation(getVar(ConfVars.HIVE_ZOOKEEPER_SSL_TRUSTSTORE_LOCATION))
-  .trustStorePassword(trustStorePassword).build();
+  

[hive] branch master updated: HIVE-27558: HBase table query does not push BETWEEN predicate to storage layer (Dayakar M, reviewed by Krisztian Kasa)

2023-09-15 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4c033b7ed16 HIVE-27558: HBase table query does not push BETWEEN 
predicate to storage layer (Dayakar M, reviewed by Krisztian Kasa)
4c033b7ed16 is described below

commit 4c033b7ed16bc544cf1e6cb262208f05c380730e
Author: Dayakar M <59791497+mdaya...@users.noreply.github.com>
AuthorDate: Fri Sep 15 19:37:59 2023 +0530

HIVE-27558: HBase table query does not push BETWEEN predicate to storage 
layer (Dayakar M, reviewed by Krisztian Kasa)

Co-authored-by: mdayakar 
---
 .../hadoop/hive/hbase/HBaseStorageHandler.java |  44 +
 .../test/queries/positive/hbase_between_pushdown.q |  28 +++
 .../results/positive/hbase_between_pushdown.q.out  | 217 +
 3 files changed, 289 insertions(+)

diff --git 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
index 24b2ec93933..4e1a5da725c 100644
--- 
a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
+++ 
b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseStorageHandler.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
@@ -54,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import 
org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
@@ -427,6 +429,10 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
   ExprNodeDesc predicate) {
 ColumnMapping keyMapping = 
hBaseSerDe.getHBaseSerdeParam().getKeyColumnMapping();
 ColumnMapping tsMapping = 
hBaseSerDe.getHBaseSerdeParam().getTimestampColumnMapping();
+
+// converts 'BETWEEN' predicate to '<= AND >=' predicate
+predicate = convertBetweenToLTOrEqualAndGTOrEqual(predicate);
+
 IndexPredicateAnalyzer analyzer = 
HiveHBaseTableInputFormat.newIndexPredicateAnalyzer(
 keyMapping.columnName, keyMapping.isComparable(),
 tsMapping == null ? null : tsMapping.columnName);
@@ -503,6 +509,19 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
 continue;
   }
 }
+
+// In HBase, Keys are sorted lexicographically so the byte 
representation of negative values comes after the positive values
+// So don't pushdown the predicate if any constant value is a negative 
number.
+// if first condition constant value is less than 0 then don't 
pushdown the predicate
+if (((Number) 
searchConditions.get(0).getConstantDesc().getValue()).longValue() < 0 && scSize 
== 2) {
+  residualPredicate = extractResidualCondition(analyzer, 
searchConditions, residualPredicate);
+  continue;
+}
+// if second condition constant value is less than 0 then don't 
pushdown the predicate
+if (scSize == 2 && ((Number) 
searchConditions.get(1).getConstantDesc().getValue()).longValue() < 0) {
+  residualPredicate = extractResidualCondition(analyzer, 
searchConditions, residualPredicate);
+  continue;
+}
   }
 
   // This one can be pushed
@@ -516,6 +535,31 @@ public class HBaseStorageHandler extends 
DefaultStorageHandler
 return decomposedPredicate;
   }
 
+  private static ExprNodeDesc 
convertBetweenToLTOrEqualAndGTOrEqual(ExprNodeDesc predicate) {
+if (predicate instanceof ExprNodeGenericFuncDesc && 
((ExprNodeGenericFuncDesc) predicate).getGenericUDF() instanceof 
GenericUDFBetween && "false".equalsIgnoreCase(
+predicate.getChildren().get(0).getExprString())) {
+  try {
+List betweenInputs = predicate.getChildren();
+List gtOrEqualInputs = 
Arrays.asList(betweenInputs.get(1), betweenInputs.get(2));
+List ltOrEqualInputs = 
Arrays.asList(betweenInputs.get(1), betweenInputs.get(3));
+
+ExprNodeGenericFuncDesc gtOrEqual =
+
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo(">=").getGenericUDF(),
 ">=",
+gtOrEqualInputs);
+ExprNodeGenericFuncDesc ltOrEqual =
+
ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("<=").getGenericUDF(),
 "<=",
+ltOrEqualInputs);
+List 

[hive] branch master updated: HIVE-27138: MapJoinOperator throws NPE when computing OuterJoin with filter expressions on small table (Seonggon Namgung, reviewed by Krisztian Kasa)

2023-09-15 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new b17ecf8e99c HIVE-27138: MapJoinOperator throws NPE when computing 
OuterJoin with filter expressions on small table (Seonggon Namgung, reviewed by 
Krisztian Kasa)
b17ecf8e99c is described below

commit b17ecf8e99c293ed250abc80b9a8ff95d37a61db
Author: seonggon 
AuthorDate: Fri Sep 15 18:24:25 2023 +0900

HIVE-27138: MapJoinOperator throws NPE when computing OuterJoin with filter 
expressions on small table (Seonggon Namgung, reviewed by Krisztian Kasa)
---
 .../persistence/MapJoinBytesTableContainer.java|   2 +-
 .../hive/ql/optimizer/ConvertJoinMapJoin.java  |   4 +-
 .../hive/ql/optimizer/FiltertagAppenderProc.java   | 229 +
 .../ql/optimizer/FullOuterMapJoinOptimization.java |  95 ---
 .../apache/hadoop/hive/ql/parse/TezCompiler.java   |  15 +
 .../ql/udf/generic/GenericUDFBaseArithmetic.java   |   2 +-
 .../hive/ql/udf/generic/GenericUDFBaseNumeric.java |   2 +-
 .../mapjoin_filter_on_outerjoin_tez.q  |  79 ++
 .../llap/fullouter_mapjoin_1_optimized.q.out   |   8 +-
 .../llap/mapjoin_filter_on_outerjoin_tez.q.out | 948 +
 .../llap/vector_fullouter_mapjoin_1_fast.q.out |  37 +-
 .../vector_fullouter_mapjoin_1_optimized.q.out |  37 +-
 ...or_fullouter_mapjoin_1_optimized_passthru.q.out |  37 +-
 .../llap/vector_outer_join_constants.q.out |   5 +-
 .../perf/tpcds30tb/tez/query97.q.out   |   1 +
 .../hive/serde2/lazybinary/LazyBinaryStruct.java   |   5 +-
 16 files changed, 1373 insertions(+), 133 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
index 464edf60f5d..79695975ef2 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java
@@ -342,7 +342,7 @@ public class MapJoinBytesTableContainer
 
 @Override
 public byte updateStateByte(Byte previousValue) {
-  if (!hasTag || filterGetter == null) {
+  if (filterGetter == null) {
 return (byte) 0xff;
   }
   byte aliasFilter = (previousValue == null) ? (byte)0xff : 
previousValue.byteValue();
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index d68a1e2d436..57299f95699 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -1542,9 +1542,7 @@ public class ConvertJoinMapJoin implements 
SemanticNodeProcessor {
 LOG.info("Selected dynamic partitioned hash join");
 MapJoinDesc mapJoinDesc = mapJoinOp.getConf();
 mapJoinDesc.setDynamicPartitionHashJoin(true);
-if (mapJoinConversion.getIsFullOuterJoin()) {
-  FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc);
-}
+
 // Set OpTraits for dynamically partitioned hash join:
 // bucketColNames: Re-use previous joinOp's bucketColNames. Parent 
operators should be
 //   reduce sink, which should have bucket columns based on the join 
keys.
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FiltertagAppenderProc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FiltertagAppenderProc.java
new file mode 100644
index 000..59295ff6bfa
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FiltertagAppenderProc.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import 

[hive] branch master updated: HIVE-24606: Multi-stage materialized CTEs can lose intermediate data (okumin, reviewed by Krisztian Kasa)

2023-09-15 Thread krisztiankasa
This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 279c2508757 HIVE-24606: Multi-stage materialized CTEs can lose 
intermediate data (okumin, reviewed by Krisztian Kasa)
279c2508757 is described below

commit 279c25087575ad52645f031dfb5a1da7d712c18c
Author: okumin 
AuthorDate: Fri Sep 15 18:18:23 2023 +0900

HIVE-24606: Multi-stage materialized CTEs can lose intermediate data 
(okumin, reviewed by Krisztian Kasa)
---
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 102 +++--
 ql/src/test/queries/clientpositive/cte_mat_10.q|  42 +++
 ql/src/test/queries/clientpositive/cte_mat_8.q |  24 ++
 ql/src/test/queries/clientpositive/cte_mat_9.q |  66 
 .../results/clientpositive/llap/cte_mat_10.q.out   | 359 ++
 .../results/clientpositive/llap/cte_mat_8.q.out| 240 
 .../results/clientpositive/llap/cte_mat_9.q.out| 411 +
 7 files changed, 1210 insertions(+), 34 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 81f1e55f6f8..cdebdc5ff47 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -45,6 +45,7 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -1378,7 +1379,9 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   @Override
   public List> getAllRootTasks() {
 if (!rootTasksResolved) {
-  rootTasks = toRealRootTasks(rootClause.asExecutionOrder());
+  LinkedHashMap> realDependencies = 
listRealDependencies();
+  linkRealDependencies(realDependencies);
+  rootTasks = toRealRootTasks(realDependencies);
   rootTasksResolved = true;
 }
 return rootTasks;
@@ -1448,47 +1451,78 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 }
   }
 
-  private List> toRealRootTasks(List execution) {
-List> cteRoots = new ArrayList<>();
-List> cteLeafs = new ArrayList<>();
-List> curTopRoots = null;
-List> curBottomLeafs = null;
-for (CTEClause current : execution) {
-  if (current.parents.isEmpty() && curTopRoots != null) {
-cteRoots.addAll(curTopRoots);
-cteLeafs.addAll(curBottomLeafs);
-curTopRoots = curBottomLeafs = null;
-  }
-  List> curTasks = current.getTasks();
-  if (curTasks == null) {
-continue;
-  }
-  if (curTopRoots == null) {
-curTopRoots = curTasks;
-  }
-  if (curBottomLeafs != null) {
-for (Task topLeafTask : curBottomLeafs) {
-  for (Task currentRootTask : curTasks) {
-topLeafTask.addDependentTask(currentRootTask);
-  }
+  private List> getRealTasks(CTEClause cte) {
+if (cte == rootClause) {
+  return rootTasks;
+} else {
+  return cte.getTasks();
+}
+  }
+
+  /**
+   * Links tasks based on dependencies among CTEs which have actual tasks.
+   * For example, when materialized CTE X depends on materialized CTE Y,
+   * the leaf tasks of Y must have the root tasks of X as its child tasks.
+   */
+  private void linkRealDependencies(LinkedHashMap> realDependencies) {
+LinkedHashMap>> dependentTasks = new 
LinkedHashMap<>();
+for (CTEClause child : realDependencies.keySet()) {
+  for (CTEClause parent : realDependencies.get(child)) {
+if (!dependentTasks.containsKey(parent)) {
+  dependentTasks.put(parent, new ArrayList<>());
 }
+dependentTasks.get(parent).addAll(getRealTasks(child));
   }
-  curBottomLeafs = Task.findLeafs(curTasks);
 }
-if (curTopRoots != null) {
-  cteRoots.addAll(curTopRoots);
-  cteLeafs.addAll(curBottomLeafs);
+// This operation must be performed only once per CTE since it creates new 
leaves
+for (CTEClause parent : dependentTasks.keySet()) {
+  List> sources = Task.findLeafs(getRealTasks(parent));
+  linkTasks(sources, dependentTasks.get(parent));
 }
+  }
 
-if (cteRoots.isEmpty()) {
-  return rootTasks;
+  private static void linkTasks(List> sources, Iterable> 
sinks) {
+for (Task source : sources) {
+  for (Task sink : sinks) {
+source.addDependentTask(sink);
+  }
 }
-for (Task cteLeafTask : cteLeafs) {
-  for (Task mainRootTask : rootTasks) {
-cteLeafTask.addDependentTask(mainRootTask);
+  }
+
+  // Returns tasks which have no dependencies and can start without waiting 
for any tasks
+  private List> toRealRootTasks(LinkedHashMap> realDependencies) {
+List> realRootTasks = 

[hive] branch master updated: HIVE-27673: Configurable datetime formatter for date_format (Stamatis Zampetakis reviewed by John Sherman, Aman Sinha, Ayush Saxena)

2023-09-15 Thread zabetak
This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 16a39b4fe77 HIVE-27673: Configurable datetime formatter for 
date_format (Stamatis Zampetakis reviewed by John Sherman, Aman Sinha, Ayush 
Saxena)
16a39b4fe77 is described below

commit 16a39b4fe77be3f204ddcffac607385f683ed129
Author: Stamatis Zampetakis 
AuthorDate: Fri Sep 8 12:20:37 2023 +0200

HIVE-27673: Configurable datetime formatter for date_format (Stamatis 
Zampetakis reviewed by John Sherman, Aman Sinha, Ayush Saxena)

SimpleDateFormat and DateTimeFormatter present differences in their 
behavior leading to different query results when date_format is used (after 
upgrade).

To avoid sudden changes in query results and allow users to migrate 
gradually their query workloads to use the new patterns the 
`hive.datetime.formatter` property can now be used to select which formatter 
will be used in date_format.

Overview of the change:
1. Generalize the UnixTimeFormatter hierarchy so that it works with 
Instant, which can represent more than epochSeconds, to be used by 
GenericUDFDateFormat class.
2. Adapt slightly other classes using the UnixTimeFormatter to pass or 
retrieve the necessary values from/to Instants.
3. Refactor GenericUDFDateFormat to use the InstantFormatter so that its 
behavior becomes configurable via `hive.datetime.formatter` property.
4. Add configure method in GenericUDFDateFormat to allowing propagation of 
sessions level configurations (formatter/zone) to the runtime (Tez, MR, etc.). 
At runtime there is no SessionState so the only way to configure date_format is 
using the respective method and global configurations in hive-site.xml, etc.
5. Extend the use of `hive.datetime.formatter` to date_format and update 
the description in HiveConf mentioning also the bugs that affect SIMPLE 
formatter.
6. Add unit tests for date_format for both formatters covering:
* Reference dates (1970-01-01) and trivial patterns
* Invalid date inputs (Jul 9 2023) that cannot be parsed
* Timestamp inputs with timezone specification (1970-01-01 00:00:00 
GMT-01:00) that is ignored/dropped silently
* Current date (2023-07-21 09:13:10.123456789) with nano precision and 
pattern variations
* Patterns ('u','S') with different behavior between formatters
* Gregorian dates (1800-01-01 00:00:00) before 1900 and different timezones
* Julian dates (1000-01-01 00:00:00) and different timezones
7. Add end-to-end tests for ensuring that configuration property takes 
effect with (in HS2)/without (in Tez) task conversion.

Essentially the unit tests also demonstrate existing bugs when the SIMPLE 
formatter is used affecting Gregorian dates before 1900 and Julian dates.

Closes #4675
---
 .../hadoop/hive/common/type/TimestampTZ.java   |   3 +
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   7 +-
 .../expressions/VectorUDFUnixTimeStampString.java  |   8 +-
 .../hive/ql/udf/generic/GenericUDFDateFormat.java  |  57 +--
 .../ql/udf/generic/GenericUDFFromUnixTime.java |  13 ++-
 .../ql/udf/generic/GenericUDFToUnixTimeStamp.java  |  10 +-
 ...ormatter.java => InstantDateTimeFormatter.java} |  20 ++--
 ...nixTimeFormatter.java => InstantFormatter.java} |  49 +-
 ...matterCache.java => InstantFormatterCache.java} |   4 +-
 ...matter.java => InstantSimpleDateFormatter.java} |  20 ++--
 .../generic/TestGenericUDFDateFormatEvaluate.java  | 104 +
 .../test/queries/clientpositive/udf_date_format.q  |   1 +
 .../clientpositive/udf_date_format_nofetchtask.q   |  26 ++
 ...{udf_date_format.q => udf_date_format_simple.q} |   4 +
 .../generic/TestGenericUDFDateFormatEvaluate.csv   |  40 
 .../clientpositive/llap/udf_date_format.q.out  |   2 +-
 .../llap/udf_date_format_nofetchtask.q.out |  83 
 ...e_format.q.out => udf_date_format_simple.q.out} |  18 ++--
 18 files changed, 369 insertions(+), 100 deletions(-)

diff --git 
a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java 
b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java
index af181ba124e..3ff06e0eead 100644
--- a/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java
+++ b/common/src/java/org/apache/hadoop/hive/common/type/TimestampTZ.java
@@ -100,4 +100,7 @@ public class TimestampTZ implements Comparable 
{
 return zonedDateTime.toInstant().getNano();
   }
 
+  public Instant toInstant() {
+return zonedDateTime.toInstant();
+  }
 }
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a76b64f87f5..10b8a34b76e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++