[hive] branch master updated (9ec42929cc5 -> b237b30cd77)
This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git from 9ec42929cc5 HIVE-27585: Upgrade kryo serialization lib to latest version (#4570). (Suprith Chandrashekharachar, reviewed by Ayush Saxena, Attila Turoczy, Simhadri Govindappa) add b237b30cd77 HIVE-27631: Fix CCE when set fs.hdfs.impl other than DistributedFileSystem (#4613). (Baolong Mao, reviewed by Ayush Saxena) No new revisions were added by this update. Summary of changes: .../0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
[hive] branch master updated (3d3acc7a193 -> 9ec42929cc5)
This is an automated email from the ASF dual-hosted git repository. ayushsaxena pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git from 3d3acc7a193 HIVE-27589: Iceberg: Branches of Merge/Update statements should be committed atomically (Simhadri Govindappa, Denys Kuzmenko, reviewed by Krisztian Kasa, Butao Zhang) add 9ec42929cc5 HIVE-27585: Upgrade kryo serialization lib to latest version (#4570). (Suprith Chandrashekharachar, reviewed by Ayush Saxena, Attila Turoczy, Simhadri Govindappa) No new revisions were added by this update. Summary of changes: iceberg/pom.xml | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
[hive] branch branch-3 updated: HIVE-27618: Backport of HIVE-25446: Wrong execption thrown if capacity<=0
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 4e14f580c06 HIVE-27618: Backport of HIVE-25446: Wrong execption thrown if capacity<=0 4e14f580c06 is described below commit 4e14f580c06a7911bfb0847c11ee234404fb637b Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Tue Aug 22 13:10:55 2023 +0530 HIVE-27618: Backport of HIVE-25446: Wrong execption thrown if capacity<=0 Signed-off-by: Sankar Hariappan Closes (#4598) --- .../mapjoin/fast/VectorMapJoinFastHashTable.java | 20 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1ba52..572c686c497 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -56,29 +56,33 @@ public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTab } private static void validateCapacity(long capacity) { -if (Long.bitCount(capacity) != 1) { - throw new AssertionError("Capacity must be a power of two"); -} if (capacity <= 0) { throw new AssertionError("Invalid capacity " + capacity); } +if (Long.bitCount(capacity) != 1) { + throw new AssertionError("Capacity must be a power of two" + capacity); +} } private static int nextHighestPowerOfTwo(int v) { -return Integer.highestOneBit(v) << 1; +int value = Integer.highestOneBit(v); +if (Integer.highestOneBit(v) == HIGHEST_INT_POWER_OF_2) { + LOG.warn("Reached highest 2 power: {}", HIGHEST_INT_POWER_OF_2); + return value; +} +return value << 1; } public VectorMapJoinFastHashTable( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { -initialCapacity = (Long.bitCount(initialCapacity) == 1) +this.logicalHashBucketCount = (Long.bitCount(initialCapacity) == 1) ? initialCapacity : nextHighestPowerOfTwo(initialCapacity); +LOG.info("Initial Capacity {} Recomputed Capacity {}", initialCapacity, logicalHashBucketCount); -validateCapacity(initialCapacity); +validateCapacity(logicalHashBucketCount); this.estimatedKeyCount = estimatedKeyCount; - -logicalHashBucketCount = initialCapacity; logicalHashBucketMask = logicalHashBucketCount - 1; resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
[hive] branch branch-3 updated: HIVE-27615: Backport of HIVE-21280: Null pointer exception on running compaction against a MM table (Aditya Shah via Ashutosh Chauhan)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new ab494206e9b HIVE-27615: Backport of HIVE-21280: Null pointer exception on running compaction against a MM table (Aditya Shah via Ashutosh Chauhan) ab494206e9b is described below commit ab494206e9b69e3f3883b64cb42d181b091273c6 Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Tue Aug 22 13:03:13 2023 +0530 HIVE-27615: Backport of HIVE-21280: Null pointer exception on running compaction against a MM table (Aditya Shah via Ashutosh Chauhan) Signed-off-by: Sankar Hariappan Closes (#4595) --- ql/src/java/org/apache/hadoop/hive/ql/DriverUtils.java | 2 +- ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/DriverUtils.java index 8228109751b..32b447c4f44 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/DriverUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverUtils.java @@ -33,7 +33,7 @@ public class DriverUtils { SessionState.setCurrentSessionState(sessionState); boolean isOk = false; try { - QueryState qs = new QueryState.Builder().withHiveConf(conf).nonIsolated().build(); + QueryState qs = new QueryState.Builder().withHiveConf(conf).withGenerateNewQueryId(true).nonIsolated().build(); Driver driver = new Driver(qs, user, null, null); driver.setCompactionWriteIds(writeIds); try { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 474f6c53426..d7e661bcd26 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -354,7 +354,7 @@ public class CompactorMR { conf.set(ConfVars.HIVE_QUOTEDID_SUPPORT.varname, "column"); String user = UserGroupInformation.getCurrentUser().getShortUserName(); - SessionState sessionState = DriverUtils.setUpSessionState(conf, user, false); + SessionState sessionState = DriverUtils.setUpSessionState(conf, user, true); // Note: we could skip creating the table and just add table type stuff directly to the // "insert overwrite directory" command if there were no bucketing or list bucketing.
[hive] branch branch-3 updated: HIVE-27552: Backport of HIVE-22360, HIVE-20619 to branch-3 (#4535)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new a3070e0dbfe HIVE-27552: Backport of HIVE-22360, HIVE-20619 to branch-3 (#4535) a3070e0dbfe is described below commit a3070e0dbfeb5de3620b5c953461f25cce6038fe Author: Aman Raj <104416558+amanraj2...@users.noreply.github.com> AuthorDate: Tue Aug 22 13:00:13 2023 +0530 HIVE-27552: Backport of HIVE-22360, HIVE-20619 to branch-3 (#4535) * HIVE-22360: MultiDelimitSerDe returns wrong results in last column when the loaded file has more columns than those in table schema (Shubham Chaurasia, reviewed by Sankar Hariappan) * HIVE-20619: Include MultiDelimitSerDe in HiveServer2 By Default (Alice Fan, reviewed by Naveen Gangam) Signed-off-by: Sankar Hariappan Closes (#4535) --- data/files/t11_csv_serde.csv | 10 + data/files/t1_multi_delimit.csv| 10 + data/files/t2_multi_delimit.csv| 4 + data/files/t3_multi_delimit.csv| 10 + .../queries/clientpositive/serde_multi_delimit.q | 65 ++ .../clientpositive/serde_multi_delimit.q.out | 232 + .../hadoop/hive}/serde2/MultiDelimitSerDe.java | 13 +- .../apache/hadoop/hive/serde2/lazy/LazyStruct.java | 56 ++--- 8 files changed, 362 insertions(+), 38 deletions(-) diff --git a/data/files/t11_csv_serde.csv b/data/files/t11_csv_serde.csv new file mode 100644 index 000..6e7060919ee --- /dev/null +++ b/data/files/t11_csv_serde.csv @@ -0,0 +1,10 @@ +1,1,,0,0 +2,1,,0,1 +3,1,,0,0 +4,1,,0,1 +5,5 + + +8,8,,8,8,8 +9,9,,9,9,9,9,,9,9,9 +10101010 \ No newline at end of file diff --git a/data/files/t1_multi_delimit.csv b/data/files/t1_multi_delimit.csv new file mode 100644 index 000..6c4e729f428 --- /dev/null +++ b/data/files/t1_multi_delimit.csv @@ -0,0 +1,10 @@ +1^,1^,^,0^,0 +2^,1^,^,0^,1 +3^,1^,^,0^,0 +4^,1^,^,0^,1 +5^,5 + + +8^,8^,^,8^,8^,8 +9^,9^,^,9^,9^,9^,9^,^,9^,9^,9 +10101010 \ No newline at end of file diff --git a/data/files/t2_multi_delimit.csv b/data/files/t2_multi_delimit.csv new file mode 100644 index 000..0dd42e1dfb6 --- /dev/null +++ b/data/files/t2_multi_delimit.csv @@ -0,0 +1,4 @@ +1^,1^,^,0^,0^,0 +2^,1^,^,0^,1^,0 +3^,1^,^,0^,0^,0 +4^,1^,^,0^,1^,0 diff --git a/data/files/t3_multi_delimit.csv b/data/files/t3_multi_delimit.csv new file mode 100644 index 000..8c49f6f3837 --- /dev/null +++ b/data/files/t3_multi_delimit.csv @@ -0,0 +1,10 @@ +1^1^^0^0 +2^1^^0^1 +3^1^^0^0 +4^1^^0^1 +5^5 + + +8^8^^8^8^8 +9^9^^9^9^9 +10101010 \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/serde_multi_delimit.q b/ql/src/test/queries/clientpositive/serde_multi_delimit.q new file mode 100644 index 000..0d851752867 --- /dev/null +++ b/ql/src/test/queries/clientpositive/serde_multi_delimit.q @@ -0,0 +1,65 @@ +-- in this table, rows of different lengths(different number of columns) are loaded +CREATE TABLE t1_multi_delimit(colA int, + colB tinyint, + colC timestamp, + colD smallint, + colE smallint) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' +WITH SERDEPROPERTIES ("field.delim"="^,")STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/t1_multi_delimit.csv" INTO TABLE t1_multi_delimit; + +SELECT * FROM t1_multi_delimit; + +-- in this table, rows of different lengths(different number of columns) and it uses csv serde +CREATE TABLE t11_csv_serde(colA int, + colB tinyint, + colC timestamp, + colD smallint, + colE smallint) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' +WITH SERDEPROPERTIES ("separatorChar" = ",")STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/t11_csv_serde.csv" INTO TABLE t11_csv_serde; + +SELECT * FROM t11_csv_serde; + +-- there should not be any difference between MultiDelimitSerDe table and OpenCSVSerde table results + +SELECT EXISTS ( +SELECT colA, colB, colC, colD, colE FROM t1_multi_delimit +MINUS +SELECT cast(colA as int), cast(colB as tinyint), cast(colC as timestamp), cast(colD as smallint), cast(colE as smallint) FROM t11_csv_serde +); + +-- in this table, file having extra column is loaded +CREATE TABLE t2_multi_delimit(colA int, + colB tinyint, + colC timestamp, + colD smallint, + colE smallint) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe' +WITH SERDEPROPERTIES ("field.delim"="^,")STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH "../../data/files/t2_multi_delimit.csv" INTO TABLE t2_multi_delimit; + +SELECT * FROM t2_multi_delimit; + +-- in this table, delimiter of 5 characters is used +CREATE TABLE t3_multi_delimit(colA int, + colB tinyint, + colC timestamp, + colD