[hive] branch master updated: HIVE-27589: Iceberg: Branches of Merge/Update statements should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by Krisztian Kasa, Butao Zhang)
This is an automated email from the ASF dual-hosted git repository. dkuzmenko pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3d3acc7a193 HIVE-27589: Iceberg: Branches of Merge/Update statements should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by Krisztian Kasa, Butao Zhang) 3d3acc7a193 is described below commit 3d3acc7a19399d749a39818573a76a0dbbaf2598 Author: Simhadri Govindappa AuthorDate: Mon Aug 21 17:56:03 2023 +0530 HIVE-27589: Iceberg: Branches of Merge/Update statements should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by Krisztian Kasa, Butao Zhang) Closes #4575 --- .../org/apache/iceberg/mr/InputFormatConfig.java | 1 - .../mr/hive/HiveIcebergOutputCommitter.java| 202 + .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 15 +- .../hive/HiveIcebergStorageHandlerTestUtils.java | 2 +- .../apache/iceberg/mr/hive/TestHiveIcebergV2.java | 119 ++ .../org/apache/iceberg/mr/hive/TestHiveShell.java | 26 ++- .../queries/positive/iceberg_atomic_merge_update.q | 99 .../positive/iceberg_atomic_merge_update.q.out | 248 + ql/src/java/org/apache/hadoop/hive/ql/Context.java | 9 +- .../org/apache/hadoop/hive/ql/exec/MoveTask.java | 10 +- .../hive/ql/metadata/HiveStorageHandler.java | 11 +- 11 files changed, 623 insertions(+), 119 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java index eb212766c7c..831edd83d0c 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/InputFormatConfig.java @@ -68,7 +68,6 @@ public class InputFormatConfig { public static final String COMMIT_FILE_THREAD_POOL_SIZE = "iceberg.mr.commit.file.thread.pool.size"; public static final int COMMIT_FILE_THREAD_POOL_SIZE_DEFAULT = 10; public static final String WRITE_TARGET_FILE_SIZE = "iceberg.mr.write.target.file.size"; - public static final String IS_OVERWRITE = "iceberg.mr.write.is.overwrite"; public static final String CASE_SENSITIVE = "iceberg.mr.case.sensitive"; public static final boolean CASE_SENSITIVE_DEFAULT = true; diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java index 1ac8a3225ec..db62dcef1e9 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergOutputCommitter.java @@ -22,6 +22,7 @@ package org.apache.iceberg.mr.hive; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.AbstractMap.SimpleImmutableEntry; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -35,15 +36,14 @@ import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context.Operation; import org.apache.hadoop.hive.ql.metadata.HiveUtils; -import org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils; import org.apache.hadoop.hive.ql.session.SessionStateUtil; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobContext; @@ -53,12 +53,13 @@ import org.apache.hadoop.mapred.TaskAttemptID; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.iceberg.AppendFiles; -import org.apache.iceberg.ContentFile; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.DeleteFiles; import org.apache.iceberg.ReplacePartitions; import org.apache.iceberg.RowDelta; +import org.apache.iceberg.Snapshot; +import org.apache.iceberg.SnapshotRef; import org.apache.iceberg.Table; import org.apache.iceberg.exceptions.NotFoundException; import org.apache.iceberg.expressions.Expressions; @@ -71,9 +72,9 @@ import org.apache.iceberg.mr.hive.writer.HiveIcebergWriter; import org.apache.iceberg.mr.hive.writer.WriterRegistry; import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import
[hive] 02/02: HIVE-25576: Configurable datetime formatter for unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai Hemanth Gantasala)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit ed51dfdbdfc109db53b3c631e9f1631e9bb65c34 Author: Stamatis Zampetakis AuthorDate: Thu Aug 17 22:02:59 2023 +0300 HIVE-25576: Configurable datetime formatter for unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai Hemanth Gantasala) The two Java formatters present diffences in their behavior leading to different query results. The supported patterns, between the two formatters, are also different something that makes existing queries crash at runtime (after upgrade). Adapting to the new behavior of DateTimeFormatter is a challenging and time-consuming task for end users especially due to the widespread use of the afforementioned unixtime functions. Although DateTimeFormatter is a clear improvement over SimpleDateFormat some users still want to retain the old behavior for compatibility reasons thus introducing a property is necessary for facilitating migration. Overview of the change: 1. Add hive.datetime.formatter property to control formatter in unix_timestamp and from_unixtime functions. 2. Add UnixTimeFormatter class hierarchy for encapsulating formatting and parsing of unixtime based on the configuration. 3. Refactor unix_timestamp (+vectorized) and from_unixtime implementations to use the new formatter classes. 4. Add parameterized unit tests for the affected UDF implementations. The test cases are chosen in way to highlight similarities and differences between the two available formatters and document the current behavior. Few interesting test cases are discussed in more detail below but not all of them. * Dates before 1800 in different timezones 1800-01-01 00:00:00;-MM-dd HH:mm:ss;Asia/Kolkata;DATETIME;-5364683608 1800-01-01 00:00:00;-MM-dd HH:mm:ss;Asia/Kolkata;SIMPLE;-5364682200 The DATETIME and SIMPLE formatter use slightly different zone conversion rules so mapping 1800-01-01 00:00:00 Asia/Kolkata to seconds since epoch presents differences. * Invalid pattern and AM/PM timestamps 2023-07-21 09:13PM;-MM-dd HH:mma;Etc/GMT;SIMPLE;1689930780 The SIMPLE formatter returns a wrong result when an invalid pattern is used. The value 1689930780 actually corrsponds to 2023-07-21 09:13AM (not PM as it was supposed to ); it seems that 'HH' takes precedence over 'a'. The combined use of 'H' and 'a' is problematic. When using AM and PM the 'h' letter is the correct pattern letter. * Number of pattern letters Jul 9 2023;MMM dd ;Etc/GMT;DATETIME;null Jul 9 2023;MMM dd ;Etc/GMT;SIMPLE;1688860800 The SIMPLE formatter does not care how many times a pattern letter is used when parsing. For this reason although the day appears as a single digit for the SIMPLE formatter that is completely fine. The same does not hold for the DATETIME formatter. Closes #4615 --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 13 ++- .../hadoop/hive/conf/TestHiveConfVarsValidate.java | 6 ++ .../expressions/VectorUDFUnixTimeStampString.java | 27 ++--- .../ql/udf/generic/GenericUDFFromUnixTime.java | 35 ++ .../ql/udf/generic/GenericUDFToUnixTimeStamp.java | 76 - .../ql/udf/generic/UnixTimeDateTimeFormatter.java | 76 + .../hive/ql/udf/generic/UnixTimeFormatter.java | 119 + .../ql/udf/generic/UnixTimeFormatterCache.java | 50 + .../udf/generic/UnixTimeSimpleDateFormatter.java | 69 .../TestVectorUDFUnixTimeStampString.java | 110 +++ .../TestGenericUDFFromUnixTimeEvaluate.java| 105 ++ ...ericUDFToUnixTimestampEvaluateStringString.java | 97 + .../expressions/TestVectorUnixTimeStampString.csv | 36 +++ .../generic/TestGenericUDFFromUnixTimeEvaluate.csv | 48 + ...nericUDFToUnixTimestampEvaluateStringString.csv | 54 ++ 15 files changed, 816 insertions(+), 105 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 108c545a384..14190915020 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3830,7 +3830,18 @@ public class HiveConf extends Configuration { HIVE_PRIVILEGE_SYNCHRONIZER_INTERVAL("hive.privilege.synchronizer.interval", "1800s", new TimeValidator(TimeUnit.SECONDS), "Interval to synchronize privileges from external authorizer periodically in HS2"), - +HIVE_DATETIME_FORMATTER("hive.datetime.formatter", "DATETIME", +new StringSet("DATETIME",
[hive] branch master updated (4d23badbd9c -> ed51dfdbdfc)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git from 4d23badbd9c HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh) new cd9b42d36d4 HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N reviewed by Akshat Mathur, Attila Turoczy, Stamatis Zampetakis) new ed51dfdbdfc HIVE-25576: Configurable datetime formatter for unix_timestamp, from_unixtime (Stamatis Zampetakis reviewed by Aman Sinha, John Sherman, Sai Hemanth Gantasala) The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../java/org/apache/hadoop/hive/conf/HiveConf.java | 13 ++- .../hadoop/hive/conf/TestHiveConfVarsValidate.java | 6 ++ .../hive/kafka/TransactionalKafkaWriterTest.java | 1 - .../expressions/VectorUDFUnixTimeStampString.java | 27 ++--- .../ql/udf/generic/GenericUDFFromUnixTime.java | 35 ++ .../ql/udf/generic/GenericUDFToUnixTimeStamp.java | 76 - .../ql/udf/generic/UnixTimeDateTimeFormatter.java | 76 + .../hive/ql/udf/generic/UnixTimeFormatter.java | 119 + .../ql/udf/generic/UnixTimeFormatterCache.java | 50 + .../udf/generic/UnixTimeSimpleDateFormatter.java | 69 .../TestVectorUDFUnixTimeStampString.java | 110 +++ .../TestGenericUDFFromUnixTimeEvaluate.java| 105 ++ ...ericUDFToUnixTimestampEvaluateStringString.java | 97 + .../expressions/TestVectorUnixTimeStampString.csv | 36 +++ .../generic/TestGenericUDFFromUnixTimeEvaluate.csv | 48 + ...nericUDFToUnixTimestampEvaluateStringString.csv | 54 ++ 16 files changed, 816 insertions(+), 106 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeDateTimeFormatter.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatter.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeFormatterCache.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/UnixTimeSimpleDateFormatter.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUDFUnixTimeStampString.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.java create mode 100644 ql/src/test/resources/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorUnixTimeStampString.csv create mode 100644 ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFFromUnixTimeEvaluate.csv create mode 100644 ql/src/test/resources/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFToUnixTimestampEvaluateStringString.csv
[hive] 01/02: HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N reviewed by Akshat Mathur, Attila Turoczy, Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git commit cd9b42d36d4f24bc6d37ccb8ffad2cadb745483f Author: Kokila N AuthorDate: Fri Jul 21 18:20:04 2023 +0530 HIVE-24771: Enable TransactionalKafkaWriterTest (Kokila N reviewed by Akshat Mathur, Attila Turoczy, Stamatis Zampetakis) Flaky check passed: http://ci.hive.apache.org/job/hive-flaky-check/728/ Closes #4512 --- .../test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java b/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java index 85bf5aac99a..07a3b5a37fe 100644 --- a/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java +++ b/kafka-handler/src/test/org/apache/hadoop/hive/kafka/TransactionalKafkaWriterTest.java @@ -58,7 +58,6 @@ import java.util.stream.IntStream; /** * Test Transactional Writer. */ -@org.junit.Ignore("HIVE-24771") public class TransactionalKafkaWriterTest { private static final String TOPIC = "TOPIC_TEST";
[hive] branch master updated: HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh)
This is an automated email from the ASF dual-hosted git repository. veghlaci05 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 4d23badbd9c HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh) 4d23badbd9c is described below commit 4d23badbd9c0a158f4ee4faee73ef5086dd2993b Author: seonggon AuthorDate: Mon Aug 21 18:30:25 2023 +0900 HIVE-27303: Set correct output name to ReduceSink when there is a SMB join after Union (Seonggon Namgung, reviewed by Denys Kuzmenko, Laszlo Vegh) --- .../apache/hadoop/hive/ql/parse/GenTezWork.java| 18 +- .../queries/clientpositive/smb_join_after_union.q | 56 .../clientpositive/llap/smb_join_after_union.q.out | 320 + 3 files changed, 393 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java index 1385f6514ba..736e562c1af 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezWork.java @@ -23,6 +23,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; import java.util.Stack; import org.apache.hadoop.hive.conf.HiveConf; @@ -423,7 +424,7 @@ public class GenTezWork implements SemanticNodeProcessor { rWork.getTagToInput().put(tag == -1 ? 0 : tag, work.getName()); // remember the output name of the reduce sink -rs.getConf().setOutputName(rWork.getName()); +rs.getConf().setOutputName(getActualOutputWorkName(context, rWork)); // For dynamic partitioned hash join, run the ReduceSinkMapJoinProc logic for any // ReduceSink parents that we missed. @@ -514,4 +515,19 @@ public class GenTezWork implements SemanticNodeProcessor { unionWork.addUnionOperators(context.currentUnionOperators); context.workWithUnionOperators.add(work); } + + /** + * If the given reduceWork is the merged work of a MergeJoinWork, return the name of that MergeJoinWork. + * Otherwise, return the name of the given reduceWork. + */ + private String getActualOutputWorkName(GenTezProcContext context, ReduceWork reduceWork) { +return context.opMergeJoinWorkMap.values().stream() +.filter(mergeJoinWork -> mergeJoinWork.getBaseWorkList().contains(reduceWork)) +.map(MergeJoinWork::getMainWork) +// getMainWork() == null means that we have not visited the leaf Operator of MergeJoinWork. +// In this case, GenTezWork will adjust the output name of merged works +// by calling MergeJoinWork.addMergedWork() with non-null argument for parameter work. +.filter(Objects::nonNull) +.findAny().orElse(reduceWork).getName(); + } } diff --git a/ql/src/test/queries/clientpositive/smb_join_after_union.q b/ql/src/test/queries/clientpositive/smb_join_after_union.q new file mode 100644 index 000..62cb72296ad --- /dev/null +++ b/ql/src/test/queries/clientpositive/smb_join_after_union.q @@ -0,0 +1,56 @@ +-- SORT_QUERY_RESULTS + +create external table hive1_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive2_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive3_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); +create external table hive4_tbl_data (COLUMID string,COLUMN_FN string,COLUMN_LN string,EMAIL string,COL_UPDATED_DATE timestamp, PK_COLUM string); + +insert into table hive1_tbl_data select '1','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-1'; +insert into table hive1_tbl_data select '2','john','doe','j...@hotmail.com','2014-01-01 12:01:02','4000-1'; +insert into table hive2_tbl_data select '1','john','doe','j...@hotmail.com','2014-01-01 12:01:02','1'; +insert into table hive2_tbl_data select '2','john','doe','j...@hotmail.com','2014-01-01 12:01:02','1'; + +-- Reference, without SMB join +set hive.auto.convert.sortmerge.join=false; + +select t.COLUMID from +(select distinct t.COLUMID as COLUMID from (SELECT COLUMID FROM hive3_tbl_data UNION ALL SELECT COLUMID FROM hive1_tbl_data) t) t +left join +(select distinct t.COLUMID from (SELECT COLUMID FROM hive4_tbl_data UNION ALL SELECT COLUMID FROM hive2_tbl_data) t) t1 +on t.COLUMID = t1.COLUMID where t1.COLUMID is null; + +-- HIVE-27303 +-- The following list is the expected OperatorGraph for the query. +-- (Path1) TS0-SEL1-UNION4 +-- (Path2)
[hive] branch branch-3 updated: HIVE-27569: Backport of HIVE-22405: Add ColumnVector support for ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 1fcde9d02f5 HIVE-27569: Backport of HIVE-22405: Add ColumnVector support for ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho Rodriguez) 1fcde9d02f5 is described below commit 1fcde9d02f5fa74e71a67f5e3fea2eba9c4ba64c Author: Shefali Singh <31477542+shefali...@users.noreply.github.com> AuthorDate: Mon Aug 21 11:57:31 2023 +0530 HIVE-27569: Backport of HIVE-22405: Add ColumnVector support for ProlepticCalendar (László Bodor via Owen O'Malley, Jesus Camacho Rodriguez) Signed-off-by: Sankar Hariappan Closes (#4552) --- .../hive/ql/exec/vector/DateColumnVector.java | 126 +++ .../hive/ql/exec/vector/TimestampColumnVector.java | 83 +++- .../hive/ql/exec/vector/TestDateColumnVector.java | 80 .../ql/exec/vector/TestTimestampColumnVector.java | 140 + 4 files changed, 407 insertions(+), 22 deletions(-) diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java new file mode 100644 index 000..3dac667f5de --- /dev/null +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DateColumnVector.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector; + +import java.text.SimpleDateFormat; +import java.util.GregorianCalendar; +import java.util.TimeZone; +import java.util.concurrent.TimeUnit; + +/** + * This class extends LongColumnVector in order to introduce some date-specific semantics. In + * DateColumnVector, the elements of vector[] represent the days since 1970-01-01 + */ +public class DateColumnVector extends LongColumnVector { + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + private static final GregorianCalendar PROLEPTIC_GREGORIAN_CALENDAR = new GregorianCalendar(UTC); + private static final GregorianCalendar GREGORIAN_CALENDAR = new GregorianCalendar(UTC); + + private static final SimpleDateFormat PROLEPTIC_GREGORIAN_DATE_FORMATTER = + new SimpleDateFormat("-MM-dd"); + private static final SimpleDateFormat GREGORIAN_DATE_FORMATTER = + new SimpleDateFormat("-MM-dd"); + + /** + * -141427: hybrid: 1582-10-15 proleptic: 1582-10-15 + * -141428: hybrid: 1582-10-04 proleptic: 1582-10-14 + */ + private static final int CUTOVER_DAY_EPOCH = -141427; // it's 1582-10-15 in both calendars + + static { +PROLEPTIC_GREGORIAN_CALENDAR.setGregorianChange(new java.util.Date(Long.MIN_VALUE)); + + PROLEPTIC_GREGORIAN_DATE_FORMATTER.setCalendar(PROLEPTIC_GREGORIAN_CALENDAR); +GREGORIAN_DATE_FORMATTER.setCalendar(GREGORIAN_CALENDAR); + } + + private boolean usingProlepticCalendar = false; + + public DateColumnVector() { +this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Change the calendar to or from proleptic. If the new and old values of the flag are the same, + * nothing is done. useProleptic - set the flag for the proleptic calendar updateData - change the + * data to match the new value of the flag. + */ + public void changeCalendar(boolean useProleptic, boolean updateData) { +if (useProleptic == usingProlepticCalendar) { + return; +} +usingProlepticCalendar = useProleptic; +if (updateData) { + try { +updateDataAccordingProlepticSetting(); + } catch (Exception e) { +throw new RuntimeException(e); + } +} + } + + private void updateDataAccordingProlepticSetting() throws Exception { +for (int i = 0; i < vector.length; i++) { + if (vector[i] >= CUTOVER_DAY_EPOCH) { // no need for conversion +continue; + } + long millis = TimeUnit.DAYS.toMillis(vector[i]); + String originalFormatted = usingProlepticCalendar ? GREGORIAN_DATE_FORMATTER.format(millis) +: PROLEPTIC_GREGORIAN_DATE_FORMATTER.format(millis); + + millis =
[hive] branch branch-3 updated: HIVE-27570: Backport of HIVE-21815: Stats in ORC file are parsed twice (Krisztian Kasa, reviewed by Gopal V)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 44792af7d20 HIVE-27570: Backport of HIVE-21815: Stats in ORC file are parsed twice (Krisztian Kasa, reviewed by Gopal V) 44792af7d20 is described below commit 44792af7d204d2da9573b43d46ee61bb4055d14a Author: Shefali Singh <31477542+shefali...@users.noreply.github.com> AuthorDate: Mon Aug 21 11:47:51 2023 +0530 HIVE-27570: Backport of HIVE-21815: Stats in ORC file are parsed twice (Krisztian Kasa, reviewed by Gopal V) Signed-off-by: Sankar Hariappan Closes (#4553) --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 73c2dcce2c6..f2f93e07322 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -1653,9 +1653,12 @@ public class OrcInputFormat implements InputFormat, if (context.cacheStripeDetails) { context.footerCache.put(new FooterCacheKey(fsFileId, file.getPath()), orcTail); } +stripes = orcReader.getStripes(); +stripeStats = orcReader.getStripeStatistics(); + } else { +stripes = orcTail.getStripes(); +stripeStats = orcTail.getStripeStatistics(); } - stripes = orcTail.getStripes(); - stripeStats = orcTail.getStripeStatistics(); fileTypes = orcTail.getTypes(); TypeDescription fileSchema = OrcUtils.convertTypeFromProtobuf(fileTypes, 0); Reader.Options readerOptions = new Reader.Options(context.conf);
[hive] branch branch-3 updated: HIVE-27571: Backport of HIVE-18702: INSERT OVERWRITE TABLE doesn't clean the table directory before overwriting (Ivan Suller via Ashutosh Chauhan, Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new dd9a71423d1 HIVE-27571: Backport of HIVE-18702: INSERT OVERWRITE TABLE doesn't clean the table directory before overwriting (Ivan Suller via Ashutosh Chauhan, Zoltan Haindrich) dd9a71423d1 is described below commit dd9a71423d1a4f748eedb6ca9f6972537e8ff796 Author: Shefali Singh <31477542+shefali...@users.noreply.github.com> AuthorDate: Mon Aug 21 11:43:51 2023 +0530 HIVE-27571: Backport of HIVE-18702: INSERT OVERWRITE TABLE doesn't clean the table directory before overwriting (Ivan Suller via Ashutosh Chauhan, Zoltan Haindrich) Signed-off-by: Sankar Hariappan Closes (#4554) --- .../test/resources/testconfiguration.properties| 1 + .../org/apache/hadoop/hive/ql/metadata/Hive.java | 18 +- .../test/queries/clientpositive/insert_overwrite.q | 77 + .../clientpositive/llap/insert_overwrite.q.out | 375 + 4 files changed, 463 insertions(+), 8 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 88f74354c9e..4145b500574 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -543,6 +543,7 @@ minillaplocal.query.files=\ insert_dir_distcp.q,\ insert_into_default_keyword.q,\ insert_into_with_schema.q,\ + insert_overwrite.q,\ insert_values_orig_table.q,\ insert_values_orig_table_use_metadata.q,\ insert1_overwrite_partitions.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index faeeb864a69..024fc64d924 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1861,7 +1861,7 @@ public class Hive { boolean needRecycle = !tbl.isTemporary() && ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName())); replaceFiles(tbl.getPath(), loadPath, destPath, oldPartPath, getConf(), isSrcLocal, - isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged); + isAutoPurge, newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged, isInsertOverwrite); } else { FileSystem fs = tbl.getDataLocation().getFileSystem(conf); copyFiles(conf, loadPath, destPath, fs, isSrcLocal, isAcidIUDoperation, @@ -2449,7 +2449,7 @@ private void constructOneLBLocationMap(FileStatus fSta, boolean needRecycle = !tbl.isTemporary() && ReplChangeManager.isSourceOfReplication(Hive.get().getDatabase(tbl.getDbName())); replaceFiles(tblPath, loadPath, destPath, tblPath, conf, isSrcLocal, isAutopurge, -newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged); +newFiles, FileUtils.HIDDEN_FILES_PATH_FILTER, needRecycle, isManaged, isInsertOverwrite); } else { try { FileSystem fs = tbl.getDataLocation().getFileSystem(conf); @@ -4197,9 +4197,9 @@ private void constructOneLBLocationMap(FileStatus fSta, * @param isManaged * If the table is managed. */ - protected void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, HiveConf conf, + private void replaceFiles(Path tablePath, Path srcf, Path destf, Path oldPath, HiveConf conf, boolean isSrcLocal, boolean purge, List newFiles, PathFilter deletePathFilter, - boolean isNeedRecycle, boolean isManaged) throws HiveException { + boolean isNeedRecycle, boolean isManaged, boolean isInsertOverwrite) throws HiveException { try { FileSystem destFs = destf.getFileSystem(conf); @@ -4212,15 +4212,17 @@ private void constructOneLBLocationMap(FileStatus fSta, } catch (IOException e) { throw new HiveException("Getting globStatus " + srcf.toString(), e); } + + // the extra check is required to make ALTER TABLE ... CONCATENATE work + if (oldPath != null && (srcs != null || isInsertOverwrite)) { +deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isNeedRecycle); + } + if (srcs == null) { LOG.info("No sources specified to move: " + srcf); return; } - if (oldPath != null) { -deleteOldPathForReplace(destf, oldPath, conf, purge, deletePathFilter, isNeedRecycle); - } - // first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates // destf boolean destfExist = FileUtils.mkdir(destFs, destf, conf); diff --git a/ql/src/test/queries/clientpositive/insert_overwrite.q
[hive] branch branch-3 updated: HIVE-27572: Backport of HIVE-21296: Dropping varchar partition throw exception (Daniel Dai, reviewed by Anishek Agarwal)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 26db0dcf940 HIVE-27572: Backport of HIVE-21296: Dropping varchar partition throw exception (Daniel Dai, reviewed by Anishek Agarwal) 26db0dcf940 is described below commit 26db0dcf94090074a05dd3cb48ac2802b678ff62 Author: Shefali Singh <31477542+shefali...@users.noreply.github.com> AuthorDate: Mon Aug 21 11:38:39 2023 +0530 HIVE-27572: Backport of HIVE-21296: Dropping varchar partition throw exception (Daniel Dai, reviewed by Anishek Agarwal) Signed-off-by: Sankar Hariappan Closes (#4555) --- .../java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java | 3 ++- ql/src/test/queries/clientpositive/partition_varchar1.q| 2 ++ ql/src/test/results/clientpositive/partition_varchar1.q.out| 10 ++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index a87fa27e904..ed84ff20641 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -96,7 +96,8 @@ public class ExprNodeDescUtils { private static boolean isDefaultPartition(ExprNodeDesc origin, String defaultPartitionName) { if (origin instanceof ExprNodeConstantDesc && ((ExprNodeConstantDesc)origin).getValue() != null && - ((ExprNodeConstantDesc)origin).getValue().equals(defaultPartitionName)) { +((ExprNodeConstantDesc)origin).getValue() instanceof String && ((ExprNodeConstantDesc)origin).getValue() +.equals(defaultPartitionName)) { return true; } else { return false; diff --git a/ql/src/test/queries/clientpositive/partition_varchar1.q b/ql/src/test/queries/clientpositive/partition_varchar1.q index dd991fd96f8..17e8357d386 100644 --- a/ql/src/test/queries/clientpositive/partition_varchar1.q +++ b/ql/src/test/queries/clientpositive/partition_varchar1.q @@ -41,4 +41,6 @@ select count(*) from partition_varchar_1 where dt <= '2000-01-01' and region = 1 -- 20 select count(*) from partition_varchar_1 where dt <> '2000-01-01' and region = 1; +alter table partition_varchar_1 drop partition (dt = '2000-01-01'); + drop table partition_varchar_1; diff --git a/ql/src/test/results/clientpositive/partition_varchar1.q.out b/ql/src/test/results/clientpositive/partition_varchar1.q.out index 93c9adfcc29..b5d1890018a 100644 --- a/ql/src/test/results/clientpositive/partition_varchar1.q.out +++ b/ql/src/test/results/clientpositive/partition_varchar1.q.out @@ -190,6 +190,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@partition_varchar_1 A masked pattern was here 20 +PREHOOK: query: alter table partition_varchar_1 drop partition (dt = '2000-01-01') +PREHOOK: type: ALTERTABLE_DROPPARTS +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1 +PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2 +POSTHOOK: query: alter table partition_varchar_1 drop partition (dt = '2000-01-01') +POSTHOOK: type: ALTERTABLE_DROPPARTS +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2 PREHOOK: query: drop table partition_varchar_1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@partition_varchar_1