[hive] branch master updated: HIVE-27589: Iceberg: Branches of Merge/Update statements should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by Krisztian Kasa, Butao Zhang)

2023-08-21 Thread dkuzmenko
This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 3d3acc7a193 HIVE-27589: Iceberg: Branches of Merge/Update statements 
should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed 
by Krisztian Kasa, Butao Zhang)
3d3acc7a193 is described below

commit 3d3acc7a19399d749a39818573a76a0dbbaf2598
Author: Simhadri Govindappa 
AuthorDate: Mon Aug 21 17:56:03 2023 +0530

HIVE-27589: Iceberg: Branches of Merge/Update statements should be 
committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by 
Krisztian Kasa, Butao Zhang)

Closes #4575
---
 .../org/apache/iceberg/mr/InputFormatConfig.java   |   1 -
 .../mr/hive/HiveIcebergOutputCommitter.java| 202 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  15 +-
 .../hive/HiveIcebergStorageHandlerTestUtils.java   |   2 +-
 .../apache/iceberg/mr/hive/TestHiveIcebergV2.java  | 119 ++
 .../org/apache/iceberg/mr/hive/TestHiveShell.java  |  26 ++-
 .../queries/positive/iceberg_atomic_merge_update.q |  99 
 .../positive/iceberg_atomic_merge_update.q.out | 248 +
 ql/src/java/org/apache/hadoop/hive/ql/Context.java |   9 +-
 .../org/apache/hadoop/hive/ql/exec/MoveTask.java   |  10 +-
 .../hive/ql/metadata/HiveStorageHandler.java   |  11 +-
 11 files changed, 623 insertions(+), 119 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
index eb212766c7c..831edd83d0c 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java
@@ -68,7 +68,6 @@ public class InputFormatConfig {
   public static final String COMMIT_FILE_THREAD_POOL_SIZE = 
"iceberg.mr.commit.file.thread.pool.size";
   public static final int COMMIT_FILE_THREAD_POOL_SIZE_DEFAULT = 10;
   public static final String WRITE_TARGET_FILE_SIZE = 
"iceberg.mr.write.target.file.size";
-  public static final String IS_OVERWRITE = "iceberg.mr.write.is.overwrite";
 
   public static final String CASE_SENSITIVE = "iceberg.mr.case.sensitive";
   public static final boolean CASE_SENSITIVE_DEFAULT = true;
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java
index 1ac8a3225ec..db62dcef1e9 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java
@@ -22,6 +22,7 @@ package org.apache.iceberg.mr.hive;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.util.AbstractMap.SimpleImmutableEntry;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -35,15 +36,14 @@ import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Context.Operation;
 import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import 
org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils;
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.JobContext;
@@ -53,12 +53,13 @@ import org.apache.hadoop.mapred.TaskAttemptID;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.iceberg.AppendFiles;
-import org.apache.iceberg.ContentFile;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.DeleteFile;
 import org.apache.iceberg.DeleteFiles;
 import org.apache.iceberg.ReplacePartitions;
 import org.apache.iceberg.RowDelta;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.SnapshotRef;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.exceptions.NotFoundException;
 import org.apache.iceberg.expressions.Expressions;
@@ -71,9 +72,9 @@ import org.apache.iceberg.mr.hive.writer.HiveIcebergWriter;
 import org.apache.iceberg.mr.hive.writer.WriterRegistry;
 import 
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import 

[hive] 02/02: HIVE-25576: Configurable datetime formatter for unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai Hemanth Gantasala)

2023-08-21 Thread zabetak
This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit ed51dfdbdfc109db53b3c631e9f1631e9bb65c34
Author: Stamatis Zampetakis 
AuthorDate: Thu Aug 17 22:02:59 2023 +0300

HIVE-25576: Configurable datetime formatter for unix_timestamp, 
from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai 
Hemanth Gantasala)

The two Java formatters present diffences in their behavior leading to
different query results. The supported patterns, between the two
formatters, are also different something that makes existing queries
crash at runtime (after upgrade). Adapting to the new behavior of
DateTimeFormatter is a challenging and time-consuming task for end
users especially due to the widespread use of the afforementioned
unixtime functions.

Although DateTimeFormatter is a clear improvement over SimpleDateFormat
some users still want to retain the old behavior for compatibility
reasons thus introducing a property is necessary for facilitating
migration.

Overview of the change:

1. Add hive.datetime.formatter property to control formatter in
unix_timestamp and from_unixtime functions.
2. Add UnixTimeFormatter class hierarchy for encapsulating formatting
and parsing of unixtime based on the configuration.
3. Refactor unix_timestamp (+vectorized) and from_unixtime
implementations to use the new formatter classes.
4. Add parameterized unit tests for the affected UDF implementations.
The test cases are chosen in way to highlight similarities and
differences between the two available formatters and document the
current behavior. Few interesting test cases are discussed in more
detail below but not all of them.

* Dates before 1800 in different timezones

1800-01-01 00:00:00;-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;-5364683608
1800-01-01 00:00:00;-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;-5364682200

The DATETIME and SIMPLE formatter use slightly different zone conversion
rules so mapping 1800-01-01 00:00:00 Asia/Kolkata to seconds since
epoch presents differences.

* Invalid pattern and AM/PM timestamps

2023-07-21 09:13PM;-MM-dd HH:mma;Etc/GMT;SIMPLE;1689930780

The SIMPLE formatter returns a wrong result when an invalid pattern is
used. The value 1689930780 actually corrsponds to 2023-07-21 09:13AM
(not PM as it was supposed to ); it seems that 'HH' takes precedence
over 'a'. The combined use of 'H' and 'a' is problematic. When using
AM and PM the 'h' letter is the correct pattern letter.

* Number of pattern letters

Jul 9 2023;MMM dd ;Etc/GMT;DATETIME;null
Jul 9 2023;MMM dd ;Etc/GMT;SIMPLE;1688860800

The SIMPLE formatter does not care how many times a pattern letter is
used when parsing. For this reason although the day appears as a single
digit for the SIMPLE formatter that is completely fine. The same does
not hold for the DATETIME formatter.

Closes #4615
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  13 ++-
 .../hadoop/hive/conf/TestHiveConfVarsValidate.java |   6 ++
 .../expressions/VectorUDFUnixTimeStampString.java  |  27 ++---
 .../ql/udf/generic/GenericUDFFromUnixTime.java |  35 ++
 .../ql/udf/generic/GenericUDFToUnixTimeStamp.java  |  76 -
 .../ql/udf/generic/UnixTimeDateTimeFormatter.java  |  76 +
 .../hive/ql/udf/generic/UnixTimeFormatter.java | 119 +
 .../ql/udf/generic/UnixTimeFormatterCache.java |  50 +
 .../udf/generic/UnixTimeSimpleDateFormatter.java   |  69 
 .../TestVectorUDFUnixTimeStampString.java  | 110 +++
 .../TestGenericUDFFromUnixTimeEvaluate.java| 105 ++
 ...ericUDFToUnixTimestampEvaluateStringString.java |  97 +
 .../expressions/TestVectorUnixTimeStampString.csv  |  36 +++
 .../generic/TestGenericUDFFromUnixTimeEvaluate.csv |  48 +
 ...nericUDFToUnixTimestampEvaluateStringString.csv |  54 ++
 15 files changed, 816 insertions(+), 105 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 108c545a384..14190915020 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3830,7 +3830,18 @@ public class HiveConf extends Configuration {
 
HIVE_PRIVILEGE_SYNCHRONIZER_INTERVAL("hive.privilege.synchronizer.interval",
 "1800s", new TimeValidator(TimeUnit.SECONDS),
 "Interval to synchronize privileges from external authorizer 
periodically in HS2"),
-
+HIVE_DATETIME_FORMATTER("hive.datetime.formatter", "DATETIME",
+new StringSet("DATETIME", 

[hive] branch master updated (4d23badbd9c -> ed51dfdbdfc)

2023-08-21 Thread zabetak
This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


from 4d23badbd9c HIVE-27303: Set correct output name to ReduceSink when 
there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, 
Laszlo Vegh)
 new cd9b42d36d4 HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N 
reviewed by Akshat Mathur, Attila Turoczy, Stamatis Zampetakis)
 new ed51dfdbdfc HIVE-25576: Configurable datetime formatter for 
unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John 
Sherman, Sai Hemanth Gantasala)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  13 ++-
 .../hadoop/hive/conf/TestHiveConfVarsValidate.java |   6 ++
 .../hive/kafka/TransactionalKafkaWriterTest.java   |   1 -
 .../expressions/VectorUDFUnixTimeStampString.java  |  27 ++---
 .../ql/udf/generic/GenericUDFFromUnixTime.java |  35 ++
 .../ql/udf/generic/GenericUDFToUnixTimeStamp.java  |  76 -
 .../ql/udf/generic/UnixTimeDateTimeFormatter.java  |  76 +
 .../hive/ql/udf/generic/UnixTimeFormatter.java | 119 +
 .../ql/udf/generic/UnixTimeFormatterCache.java |  50 +
 .../udf/generic/UnixTimeSimpleDateFormatter.java   |  69 
 .../TestVectorUDFUnixTimeStampString.java  | 110 +++
 .../TestGenericUDFFromUnixTimeEvaluate.java| 105 ++
 ...ericUDFToUnixTimestampEvaluateStringString.java |  97 +
 .../expressions/TestVectorUnixTimeStampString.csv  |  36 +++
 .../generic/TestGenericUDFFromUnixTimeEvaluate.csv |  48 +
 ...nericUDFToUnixTimestampEvaluateStringString.csv |  54 ++
 16 files changed, 816 insertions(+), 106 deletions(-)
 create mode 100644 
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java
 create mode 100644 
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java
 create mode 100644 
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java
 create mode 100644 
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java
 create mode 100644 
ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFUnixTimeStampString.java
 create mode 100644 
ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.java
 create mode 100644 
ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.java
 create mode 100644 
ql/src/test/resources/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUnixTimeStampString.csv
 create mode 100644 
ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.csv
 create mode 100644 
ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.csv



[hive] 01/02: HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N reviewed by Akshat Mathur, Attila Turoczy, Stamatis Zampetakis)

2023-08-21 Thread zabetak
This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit cd9b42d36d4f24bc6d37ccb8ffad2cadb745483f
Author: Kokila N 
AuthorDate: Fri Jul 21 18:20:04 2023 +0530

HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N reviewed by 
Akshat Mathur, Attila Turoczy, Stamatis Zampetakis)

Flaky check passed: http://ci.hive.apache.org/job/hive-flaky-check/728/

Closes #4512
---
 .../test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java  | 1 -
 1 file changed, 1 deletion(-)

diff --git 
a/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java
 
b/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java
index 85bf5aac99a..07a3b5a37fe 100644
--- 
a/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java
+++ 
b/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java
@@ -58,7 +58,6 @@ import java.util.stream.IntStream;
 /**
  * Test Transactional Writer.
  */
-@org.junit.Ignore("HIVE-24771")
 public class TransactionalKafkaWriterTest {
 
   private static final String TOPIC = "TOPIC_TEST";



[hive] branch master updated: HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh)

2023-08-21 Thread veghlaci05
This is an automated email from the ASF dual-hosted git repository.

veghlaci05 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 4d23badbd9c HIVE-27303: Set correct output name to ReduceSink when 
there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, 
Laszlo Vegh)
4d23badbd9c is described below

commit 4d23badbd9c0a158f4ee4faee73ef5086dd2993b
Author: seonggon 
AuthorDate: Mon Aug 21 18:30:25 2023 +0900

HIVE-27303: Set correct output name to ReduceSink when there is a SMB join 
after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh)
---
 .../apache/hadoop/hive/ql/parse/GenTezWork.java|  18 +-
 .../queries/clientpositive/smb_join_after_union.q  |  56 
 .../clientpositive/llap/smb_join_after_union.q.out | 320 +
 3 files changed, 393 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
index 1385f6514ba..736e562c1af 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java
@@ -23,6 +23,7 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Objects;
 import java.util.Stack;
 
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -423,7 +424,7 @@ public class GenTezWork implements SemanticNodeProcessor {
 rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName());
 
 // remember the output name of the reduce sink
-rs.getConf().setOutputName(rWork.getName());
+rs.getConf().setOutputName(getActualOutputWorkName(context, rWork));
 
 // For dynamic partitioned hash join, run the ReduceSinkMapJoinProc 
logic for any
 // ReduceSink parents that we missed.
@@ -514,4 +515,19 @@ public class GenTezWork implements SemanticNodeProcessor {
 unionWork.addUnionOperators(context.currentUnionOperators);
 context.workWithUnionOperators.add(work);
   }
+
+  /**
+   * If the given reduceWork is the merged work of a MergeJoinWork, return the 
name of that MergeJoinWork.
+   * Otherwise, return the name of the given reduceWork.
+   */
+  private String getActualOutputWorkName(GenTezProcContext context, ReduceWork 
reduceWork) {
+return context.opMergeJoinWorkMap.values().stream()
+.filter(mergeJoinWork -> 
mergeJoinWork.getBaseWorkList().contains(reduceWork))
+.map(MergeJoinWork::getMainWork)
+// getMainWork() == null means that we have not visited the leaf 
Operator of MergeJoinWork.
+// In this case, GenTezWork will adjust the output name of merged works
+// by calling MergeJoinWork.addMergedWork() with non-null argument for 
parameter work.
+.filter(Objects::nonNull)
+.findAny().orElse(reduceWork).getName();
+  }
 }
diff --git a/ql/src/test/queries/clientpositive/smb_join_after_union.q 
b/ql/src/test/queries/clientpositive/smb_join_after_union.q
new file mode 100644
index 000..62cb72296ad
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/smb_join_after_union.q
@@ -0,0 +1,56 @@
+-- SORT_QUERY_RESULTS
+
+create external table hive1_tbl_data (COLUMID string,COLUMN_FN 
string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM 
string);
+create external table hive2_tbl_data (COLUMID string,COLUMN_FN 
string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM 
string);
+create external table hive3_tbl_data (COLUMID string,COLUMN_FN 
string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM 
string);
+create external table hive4_tbl_data (COLUMID string,COLUMN_FN 
string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM 
string);
+
+insert into table hive1_tbl_data select 
'1','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-1';
+insert into table hive1_tbl_data select 
'2','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-1';
+insert into table hive2_tbl_data select 
'1','john','doe','j...@hotmail.com','2014-01-01 12:01:02','1';
+insert into table hive2_tbl_data select 
'2','john','doe','j...@hotmail.com','2014-01-01 12:01:02','1';
+
+-- Reference, without SMB join
+set hive.auto.convert.sortmerge.join=false;
+
+select t.COLUMID from 
+(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data 
UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t
+left join
+(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL 
SELECT COLUMID FROM hive2_tbl_data) t) t1
+on t.COLUMID = t1.COLUMID where t1.COLUMID is null;
+
+-- HIVE-27303
+-- The following list is the expected OperatorGraph for the query.
+-- (Path1)   TS0-SEL1-UNION4
+-- (Path2)   

[hive] branch branch-3 updated: HIVE-27569: Backport of HIVE-22405: Add ColumnVector support for ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho Rodriguez)

2023-08-21 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new 1fcde9d02f5 HIVE-27569: Backport of HIVE-22405: Add ColumnVector 
support for ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho 
Rodriguez)
1fcde9d02f5 is described below

commit 1fcde9d02f5fa74e71a67f5e3fea2eba9c4ba64c
Author: Shefali Singh <31477542+shefali...@users.noreply.github.com>
AuthorDate: Mon Aug 21 11:57:31 2023 +0530

HIVE-27569: Backport of HIVE-22405: Add ColumnVector support for 
ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho Rodriguez)

Signed-off-by: Sankar Hariappan 
Closes (#4552)
---
 .../hive/ql/exec/vector/DateColumnVector.java  | 126 +++
 .../hive/ql/exec/vector/TimestampColumnVector.java |  83 +++-
 .../hive/ql/exec/vector/TestDateColumnVector.java  |  80 
 .../ql/exec/vector/TestTimestampColumnVector.java  | 140 +
 4 files changed, 407 insertions(+), 22 deletions(-)

diff --git 
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
new file mode 100644
index 000..3dac667f5de
--- /dev/null
+++ 
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import java.text.SimpleDateFormat;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * This class extends LongColumnVector in order to introduce some 
date-specific semantics. In
+ * DateColumnVector, the elements of vector[] represent the days since 
1970-01-01
+ */
+public class DateColumnVector extends LongColumnVector {
+  private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
+  private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR = new 
GregorianCalendar(UTC);
+  private static final GregorianCalendar GREGORIAN_CALENDAR = new 
GregorianCalendar(UTC);
+
+  private static final SimpleDateFormat PROLEPTIC_GREGORIAN_DATE_FORMATTER =
+  new SimpleDateFormat("-MM-dd");
+  private static final SimpleDateFormat GREGORIAN_DATE_FORMATTER =
+  new SimpleDateFormat("-MM-dd");
+
+  /**
+  * -141427: hybrid: 1582-10-15 proleptic: 1582-10-15
+  * -141428: hybrid: 1582-10-04 proleptic: 1582-10-14
+  */
+  private static final int CUTOVER_DAY_EPOCH = -141427; // it's 1582-10-15 in 
both calendars
+
+  static {
+PROLEPTIC_GREGORIAN_CALENDAR.setGregorianChange(new 
java.util.Date(Long.MIN_VALUE));
+
+
PROLEPTIC_GREGORIAN_DATE_FORMATTER.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR);
+GREGORIAN_DATE_FORMATTER.setCalendar(GREGORIAN_CALENDAR);
+  }
+
+  private boolean usingProlepticCalendar = false;
+
+  public DateColumnVector() {
+this(VectorizedRowBatch.DEFAULT_SIZE);
+  }
+
+  /**
+   * Change the calendar to or from proleptic. If the new and old values of 
the flag are the same,
+   * nothing is done. useProleptic - set the flag for the proleptic calendar 
updateData - change the
+   * data to match the new value of the flag.
+   */
+  public void changeCalendar(boolean useProleptic, boolean updateData) {
+if (useProleptic == usingProlepticCalendar) {
+  return;
+}
+usingProlepticCalendar = useProleptic;
+if (updateData) {
+  try {
+updateDataAccordingProlepticSetting();
+  } catch (Exception e) {
+throw new RuntimeException(e);
+  }
+}
+  }
+
+  private void updateDataAccordingProlepticSetting() throws Exception {
+for (int i = 0; i < vector.length; i++) {
+  if (vector[i] >= CUTOVER_DAY_EPOCH) { // no need for conversion
+continue;
+  }
+  long millis = TimeUnit.DAYS.toMillis(vector[i]);
+  String originalFormatted = usingProlepticCalendar ? 
GREGORIAN_DATE_FORMATTER.format(millis)
+: PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis);
+
+  millis = 

[hive] branch branch-3 updated: HIVE-27570: Backport of HIVE-21815: Stats in ORC file are parsed twice (Krisztian Kasa, reviewed by Gopal V)

2023-08-21 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new 44792af7d20 HIVE-27570: Backport of HIVE-21815: Stats in ORC file are 
parsed twice (Krisztian Kasa, reviewed by Gopal V)
44792af7d20 is described below

commit 44792af7d204d2da9573b43d46ee61bb4055d14a
Author: Shefali Singh <31477542+shefali...@users.noreply.github.com>
AuthorDate: Mon Aug 21 11:47:51 2023 +0530

HIVE-27570: Backport of HIVE-21815: Stats in ORC file are parsed twice 
(Krisztian Kasa, reviewed by Gopal V)

Signed-off-by: Sankar Hariappan 
Closes (#4553)
---
 ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 73c2dcce2c6..f2f93e07322 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -1653,9 +1653,12 @@ public class OrcInputFormat implements 
InputFormat,
 if (context.cacheStripeDetails) {
   context.footerCache.put(new FooterCacheKey(fsFileId, 
file.getPath()), orcTail);
 }
+stripes = orcReader.getStripes();
+stripeStats = orcReader.getStripeStatistics();
+  } else {
+stripes = orcTail.getStripes();
+stripeStats = orcTail.getStripeStatistics();
   }
-  stripes = orcTail.getStripes();
-  stripeStats = orcTail.getStripeStatistics();
   fileTypes = orcTail.getTypes();
   TypeDescription fileSchema = OrcUtils.convertTypeFromProtobuf(fileTypes, 
0);
   Reader.Options readerOptions = new Reader.Options(context.conf);



[hive] branch branch-3 updated: HIVE-27571: Backport of HIVE-18702: INSERT OVERWRITE TABLE doesn't clean the table directory before overwriting (Ivan Suller via Ashutosh Chauhan, Zoltan Haindrich)

2023-08-21 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new dd9a71423d1 HIVE-27571:  Backport of HIVE-18702: INSERT OVERWRITE 
TABLE doesn't clean the table directory before overwriting (Ivan Suller via 
Ashutosh Chauhan, Zoltan Haindrich)
dd9a71423d1 is described below

commit dd9a71423d1a4f748eedb6ca9f6972537e8ff796
Author: Shefali Singh <31477542+shefali...@users.noreply.github.com>
AuthorDate: Mon Aug 21 11:43:51 2023 +0530

HIVE-27571:  Backport of HIVE-18702: INSERT OVERWRITE TABLE doesn't clean 
the table directory before overwriting (Ivan Suller via Ashutosh Chauhan, 
Zoltan Haindrich)

Signed-off-by: Sankar Hariappan 
Closes (#4554)
---
 .../test/resources/testconfiguration.properties|   1 +
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |  18 +-
 .../test/queries/clientpositive/insert_overwrite.q |  77 +
 .../clientpositive/llap/insert_overwrite.q.out | 375 +
 4 files changed, 463 insertions(+), 8 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 88f74354c9e..4145b500574 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -543,6 +543,7 @@ minillaplocal.query.files=\
   insert_dir_distcp.q,\
   insert_into_default_keyword.q,\
   insert_into_with_schema.q,\
+  insert_overwrite.q,\
   insert_values_orig_table.q,\
   insert_values_orig_table_use_metadata.q,\
   insert1_overwrite_partitions.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index faeeb864a69..024fc64d924 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -1861,7 +1861,7 @@ public class Hive {
   boolean needRecycle = !tbl.isTemporary()
   && 
ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName()));
   replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, 
getConf(), isSrcLocal,
-  isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, 
needRecycle, isManaged);
+  isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, 
needRecycle, isManaged, isInsertOverwrite);
 } else {
   FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
   copyFiles(conf, loadPath, destPath, fs, isSrcLocal, 
isAcidIUDoperation,
@@ -2449,7 +2449,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
 boolean needRecycle = !tbl.isTemporary()
 && 
ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName()));
 replaceFiles(tblPath, loadPath, destPath, tblPath, conf, isSrcLocal, 
isAutopurge,
-newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, 
isManaged);
+newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, 
isManaged, isInsertOverwrite);
   } else {
 try {
   FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
@@ -4197,9 +4197,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
* @param isManaged
*  If the table is managed.
*/
-  protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path 
oldPath, HiveConf conf,
+  private void replaceFiles(Path tablePath, Path srcf, Path destf, Path 
oldPath, HiveConf conf,
   boolean isSrcLocal, boolean purge, List newFiles, PathFilter 
deletePathFilter,
-  boolean isNeedRecycle, boolean isManaged) throws HiveException {
+  boolean isNeedRecycle, boolean isManaged, boolean isInsertOverwrite) 
throws HiveException {
 try {
 
   FileSystem destFs = destf.getFileSystem(conf);
@@ -4212,15 +4212,17 @@ private void constructOneLBLocationMap(FileStatus fSta,
   } catch (IOException e) {
 throw new HiveException("Getting globStatus " + srcf.toString(), e);
   }
+
+  // the extra check is required to make ALTER TABLE ... CONCATENATE work
+  if (oldPath != null && (srcs != null || isInsertOverwrite)) {
+deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, 
isNeedRecycle);
+  }
+
   if (srcs == null) {
 LOG.info("No sources specified to move: " + srcf);
 return;
   }
 
-  if (oldPath != null) {
-deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, 
isNeedRecycle);
-  }
-
   // first call FileUtils.mkdir to make sure that destf directory exists, 
if not, it creates
   // destf
   boolean destfExist = FileUtils.mkdir(destFs, destf, conf);
diff --git a/ql/src/test/queries/clientpositive/insert_overwrite.q 

[hive] branch branch-3 updated: HIVE-27572: Backport of HIVE-21296: Dropping varchar partition throw exception (Daniel Dai, reviewed by Anishek Agarwal)

2023-08-21 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new 26db0dcf940 HIVE-27572: Backport of HIVE-21296: Dropping varchar 
partition throw exception (Daniel Dai, reviewed by Anishek Agarwal)
26db0dcf940 is described below

commit 26db0dcf94090074a05dd3cb48ac2802b678ff62
Author: Shefali Singh <31477542+shefali...@users.noreply.github.com>
AuthorDate: Mon Aug 21 11:38:39 2023 +0530

HIVE-27572: Backport of HIVE-21296: Dropping varchar partition throw 
exception (Daniel Dai, reviewed by Anishek Agarwal)

Signed-off-by: Sankar Hariappan 
Closes (#4555)
---
 .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java |  3 ++-
 ql/src/test/queries/clientpositive/partition_varchar1.q|  2 ++
 ql/src/test/results/clientpositive/partition_varchar1.q.out| 10 ++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index a87fa27e904..ed84ff20641 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -96,7 +96,8 @@ public class ExprNodeDescUtils {
 
   private static boolean isDefaultPartition(ExprNodeDesc origin, String 
defaultPartitionName) {
 if (origin instanceof ExprNodeConstantDesc && 
((ExprNodeConstantDesc)origin).getValue() != null &&
-
((ExprNodeConstantDesc)origin).getValue().equals(defaultPartitionName)) {
+((ExprNodeConstantDesc)origin).getValue() instanceof String && 
((ExprNodeConstantDesc)origin).getValue()
+.equals(defaultPartitionName)) {
   return true;
 } else {
   return false;
diff --git a/ql/src/test/queries/clientpositive/partition_varchar1.q 
b/ql/src/test/queries/clientpositive/partition_varchar1.q
index dd991fd96f8..17e8357d386 100644
--- a/ql/src/test/queries/clientpositive/partition_varchar1.q
+++ b/ql/src/test/queries/clientpositive/partition_varchar1.q
@@ -41,4 +41,6 @@ select count(*) from partition_varchar_1 where dt <= 
'2000-01-01' and region = 1
 -- 20
 select count(*) from partition_varchar_1 where dt <> '2000-01-01' and region = 
1;
 
+alter table partition_varchar_1 drop partition (dt = '2000-01-01');
+
 drop table partition_varchar_1;
diff --git a/ql/src/test/results/clientpositive/partition_varchar1.q.out 
b/ql/src/test/results/clientpositive/partition_varchar1.q.out
index 93c9adfcc29..b5d1890018a 100644
--- a/ql/src/test/results/clientpositive/partition_varchar1.q.out
+++ b/ql/src/test/results/clientpositive/partition_varchar1.q.out
@@ -190,6 +190,16 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@partition_varchar_1
  A masked pattern was here 
 20
+PREHOOK: query: alter table partition_varchar_1 drop partition (dt = 
'2000-01-01')
+PREHOOK: type: ALTERTABLE_DROPPARTS
+PREHOOK: Input: default@partition_varchar_1
+PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1
+PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2
+POSTHOOK: query: alter table partition_varchar_1 drop partition (dt = 
'2000-01-01')
+POSTHOOK: type: ALTERTABLE_DROPPARTS
+POSTHOOK: Input: default@partition_varchar_1
+POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1
+POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2
 PREHOOK: query: drop table partition_varchar_1
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@partition_varchar_1