[hive] branch master updated: HIVE-27670: Failed to build the image locally on Apple silicon (#4680) (Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, Zoltan Ratkai)

2023-09-12 Thread dengzh
This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 126e3da49cc HIVE-27670: Failed to build the image locally on Apple 
silicon (#4680) (Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, 
Zoltan Ratkai)
126e3da49cc is described below

commit 126e3da49ccab22cce376ae8f9af76350ecdcd5c
Author: dengzh 
AuthorDate: Wed Sep 13 12:04:12 2023 +0800

HIVE-27670: Failed to build the image locally on Apple silicon (#4680) 
(Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, Zoltan Ratkai)
---
 packaging/src/docker/README.md | 5 +
 packaging/src/docker/build.sh  | 2 +-
 packaging/src/docker/entrypoint.sh | 6 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/packaging/src/docker/README.md b/packaging/src/docker/README.md
index e15de253368..c206914ef4d 100644
--- a/packaging/src/docker/README.md
+++ b/packaging/src/docker/README.md
@@ -72,6 +72,11 @@ For a quick start, launch the Metastore with Derby,
 --name metastore apache/hive:${HIVE_VERSION}
   ```
 
+NOTE:
+
+For Hive releases before 4.0, if you want to upgrade the existing external 
Metastore schema to the target version,
+then add "--env SCHEMA_COMMAND=upgradeSchema" to the command.
+
 - HiveServer2
 
 Launch the HiveServer2 with an embedded Metastore,
diff --git a/packaging/src/docker/build.sh b/packaging/src/docker/build.sh
index b5cbce78882..f13e3307f71 100755
--- a/packaging/src/docker/build.sh
+++ b/packaging/src/docker/build.sh
@@ -110,7 +110,7 @@ docker build \
 "$WORK_DIR" \
 -f "$WORK_DIR/Dockerfile" \
 -t "$repo/hive:$HIVE_VERSION" \
---build-arg "BUILD_ENV=unarchive"
+--build-arg "BUILD_ENV=unarchive" \
 --build-arg "HIVE_VERSION=$HIVE_VERSION" \
 --build-arg "HADOOP_VERSION=$HADOOP_VERSION" \
 --build-arg "TEZ_VERSION=$TEZ_VERSION" \
diff --git a/packaging/src/docker/entrypoint.sh 
b/packaging/src/docker/entrypoint.sh
index 8f3c4bc564f..a19b50d8daa 100644
--- a/packaging/src/docker/entrypoint.sh
+++ b/packaging/src/docker/entrypoint.sh
@@ -24,7 +24,11 @@ set -x
 SKIP_SCHEMA_INIT="${IS_RESUME:-false}"
 
 function initialize_hive {
-  $HIVE_HOME/bin/schematool -dbType $DB_DRIVER -initOrUpgradeSchema
+  COMMAND="-initOrUpgradeSchema"
+  if [ "$(echo "$HIVE_VER" | cut -d '.' -f1)" -lt "4" ]; then
+ COMMAND="-${SCHEMA_COMMAND:-initSchema}"
+  fi
+  $HIVE_HOME/bin/schematool -dbType $DB_DRIVER $COMMAND
   if [ $? -eq 0 ]; then
 echo "Initialized schema successfully.."
   else



[hive] branch branch-3 updated: HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia

2023-09-12 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new fa4c8305be6 HIVE-27666: Backport of HIVE-22903: Vectorized 
row_number() resets the row number after one batch in case of constant 
expression in partition clause (Shubham Chaurasia via Ramesh Kumar)
fa4c8305be6 is described below

commit fa4c8305be64ecc9510ab2bc76d2413e9287597a
Author: Diksha628 <43694846+diksha...@users.noreply.github.com>
AuthorDate: Tue Sep 12 17:25:54 2023 +0530

HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row 
number after one batch in case of constant expression in partition clause 
(Shubham Chaurasia via Ramesh Kumar)

Signed-off-by: Sankar Hariappan 
Closes (#4661)
---
 .../hive/ql/exec/vector/ptf/VectorPTFOperator.java |   4 +-
 .../clientpositive/vector_windowing_row_number.q   |  75 ++
 .../vector_windowing_row_number.q.out  | 912 +
 3 files changed, 989 insertions(+), 2 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java
index 39fab2cba2b..f401cf7faef 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java
@@ -413,8 +413,8 @@ public class VectorPTFOperator extends Operator
   groupBatches.fillGroupResultsAndForward(this, batch);
 }
 
-// If we are only processing a PARTITION BY, reset our evaluators.
-if (!isPartitionOrderBy) {
+// If we are only processing a PARTITION BY and isLastGroupBatch, reset 
our evaluators.
+if (!isPartitionOrderBy && isLastGroupBatch) {
   groupBatches.resetEvaluators();
 }
   }
diff --git a/ql/src/test/queries/clientpositive/vector_windowing_row_number.q 
b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q
new file mode 100644
index 000..673a9ad3d44
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q
@@ -0,0 +1,75 @@
+set hive.cli.print.header=true;
+SET hive.vectorized.execution.enabled=true;
+SET hive.vectorized.execution.reduce.enabled=true;
+set hive.vectorized.execution.ptf.enabled=true;
+set hive.fetch.task.conversion=none;
+
+drop table row_number_test;
+
+create table row_number_test as select explode(split(repeat("w,", 2400), ","));
+
+insert into row_number_test select explode(split(repeat("x,", 1200), ","));
+
+insert into row_number_test select explode(split(repeat("y,", 700), ","));
+
+insert into row_number_test select explode(split(repeat("z,", 600), ","));
+
+explain select
+row_number() over() as r1,
+row_number() over(order by col) r2,
+row_number() over(partition by col) r3,
+row_number() over(partition by col order by col) r4,
+row_number() over(partition by 1 order by col) r5,
+row_number() over(partition by col order by 2) r6,
+row_number() over(partition by 1 order by 2) r7,
+col
+from row_number_test;
+
+create table row_numbers_vectorized as select
+row_number() over() as r1,
+row_number() over(order by col) r2,
+row_number() over(partition by col) r3,
+row_number() over(partition by col order by col) r4,
+row_number() over(partition by 1 order by col) r5,
+row_number() over(partition by col order by 2) r6,
+row_number() over(partition by 1 order by 2) r7,
+col
+from row_number_test;
+
+SET hive.vectorized.execution.enabled=false;
+SET hive.vectorized.execution.reduce.enabled=false;
+set hive.vectorized.execution.ptf.enabled=false;
+
+explain select
+row_number() over() as r1,
+row_number() over(order by col) r2,
+row_number() over(partition by col) r3,
+row_number() over(partition by col order by col) r4,
+row_number() over(partition by 1 order by col) r5,
+row_number() over(partition by col order by 2) r6,
+row_number() over(partition by 1 order by 2) r7,
+col
+from row_number_test;
+
+create table row_numbers_non_vectorized as select
+row_number() over() as r1,
+row_number() over(order by col) r2,
+row_number() over(partition by col) r3,
+row_number() over(partition by col order by col) r4,
+row_number() over(partition by 1 order by col) r5,
+row_number() over(partition by col order by 2) r6,
+row_number() over(partition by 1 order by 2) r7,
+col
+from row_number_test;
+
+-- compare results of vectorized with those of non-vectorized execution
+
+select exists(
+select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized
+minus
+select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized
+) diff_exists;
+
+drop table row_numbers_non_vectorized;
+drop table row_numbers_vectorized;
+drop table row_number_test;

[hive] branch branch-3 updated: HIVE-27388: Backport of HIVE-23058: Compaction task reattempt fails with FileAlreadyExistsException (Riju Trivedi, reviewed by Laszlo Pinter)

2023-09-12 Thread sankarh
This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
 new 597dc69a85e HIVE-27388: Backport of HIVE-23058: Compaction task 
reattempt fails with FileAlreadyExistsException (Riju Trivedi, reviewed by 
Laszlo Pinter)
597dc69a85e is described below

commit 597dc69a85ec487983a2b12af8e29d24fc61ff04
Author: Diksha628 <43694846+diksha...@users.noreply.github.com>
AuthorDate: Tue Sep 12 12:34:06 2023 +0530

HIVE-27388: Backport of HIVE-23058: Compaction task reattempt fails with 
FileAlreadyExistsException (Riju Trivedi, reviewed by Laszlo Pinter)

Signed-off-by: Sankar Hariappan 
Closes (#4659)
---
 .../hive/ql/txn/compactor/TestCompactor.java   | 60 +++---
 .../hadoop/hive/ql/txn/compactor/CompactorMR.java  | 13 -
 2 files changed, 64 insertions(+), 9 deletions(-)

diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index 0827bcdb695..c0cf05ea3d0 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -24,14 +24,7 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.SortedSet;
-import java.util.TreeSet;
+import java.util.*;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -1602,6 +1595,57 @@ public class TestCompactor {
   0L, 0L, 1);
   }
 
+  @Test
+  public void testCompactionForFileInSratchDir() throws Exception {
+String dbName = "default";
+String tblName = "cfs";
+String columnNamesProperty = "a,b";
+String columnTypesProperty = "int:string";
+String createQuery = "CREATE TABLE " + tblName + "(a INT, b STRING) " + 
"STORED AS ORC  TBLPROPERTIES ('transactional'='true',"
++ "'transactional_properties'='default')";
+executeStatementOnDriver("drop table if exists " + tblName, driver);
+executeStatementOnDriver(createQuery, driver);
+
+
+
+// Insert some data -> this will generate only insert deltas
+executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 
'foo')", driver);
+
+// Insert some data -> this will again generate only insert deltas
+executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 
'bar')", driver);
+
+// Find the location of the table
+IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
+Table table = msClient.getTable(dbName, tblName);
+FileSystem fs = FileSystem.get(conf);
+
+Map tblProperties = new HashMap<>();
+
tblProperties.put("compactor.hive.compactor.input.tmp.dir",table.getSd().getLocation()
 + "/" + "_tmp");
+
+//Create empty file in ScratchDir under table location
+String scratchDirPath = table.getSd().getLocation() + "/" + "_tmp";
+Path dir = new Path(scratchDirPath + "/base_002_v005");
+fs.mkdirs(dir);
+Path emptyFile = AcidUtils.createBucketFile(dir, 0);
+fs.create(emptyFile);
+
+//Run MajorCompaction
+TxnStore txnHandler = TxnUtils.getTxnStore(conf);
+Worker t = new Worker();
+t.setThreadId((int) t.getId());
+t.setConf(conf);
+t.init(new AtomicBoolean(true), new AtomicBoolean());
+CompactionRequest Cr = new CompactionRequest(dbName, tblName, 
CompactionType.MAJOR);
+Cr.setProperties(tblProperties);
+txnHandler.compact(Cr);
+t.run();
+
+ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+Assert.assertEquals(1, rsp.getCompacts().size());
+Assert.assertEquals(TxnStore.CLEANING_RESPONSE, 
rsp.getCompacts().get(0).getState());
+
+  }
+
   @Test
   public void minorCompactWhileStreamingWithSplitUpdate() throws Exception {
 String dbName = "default";
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index d7e661bcd26..e3ceb3af055 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -1028,7 +1028,18 @@ public class CompactorMR {
 AcidOutputFormat aof =
 instantiate(AcidOutputFormat.class, 
jobConf.get(OUTPUT_FORMAT_CLASS_NAME));
 
-writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), 
options);
+Path rootDir = new Path(jobConf.get(TMP_LOCATION));
+cleanupTmpLocationOnTaskRetry