[hive] branch master updated: HIVE-27670: Failed to build the image locally on Apple silicon (#4680) (Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, Zoltan Ratkai)
This is an automated email from the ASF dual-hosted git repository. dengzh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 126e3da49cc HIVE-27670: Failed to build the image locally on Apple silicon (#4680) (Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, Zoltan Ratkai) 126e3da49cc is described below commit 126e3da49ccab22cce376ae8f9af76350ecdcd5c Author: dengzh AuthorDate: Wed Sep 13 12:04:12 2023 +0800 HIVE-27670: Failed to build the image locally on Apple silicon (#4680) (Zhihua Deng, reviewed by Ayush Saxena, Simhadri Govindappa, Zoltan Ratkai) --- packaging/src/docker/README.md | 5 + packaging/src/docker/build.sh | 2 +- packaging/src/docker/entrypoint.sh | 6 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/packaging/src/docker/README.md b/packaging/src/docker/README.md index e15de253368..c206914ef4d 100644 --- a/packaging/src/docker/README.md +++ b/packaging/src/docker/README.md @@ -72,6 +72,11 @@ For a quick start, launch the Metastore with Derby, --name metastore apache/hive:${HIVE_VERSION} ``` +NOTE: + +For Hive releases before 4.0, if you want to upgrade the existing external Metastore schema to the target version, +then add "--env SCHEMA_COMMAND=upgradeSchema" to the command. + - HiveServer2 Launch the HiveServer2 with an embedded Metastore, diff --git a/packaging/src/docker/build.sh b/packaging/src/docker/build.sh index b5cbce78882..f13e3307f71 100755 --- a/packaging/src/docker/build.sh +++ b/packaging/src/docker/build.sh @@ -110,7 +110,7 @@ docker build \ "$WORK_DIR" \ -f "$WORK_DIR/Dockerfile" \ -t "$repo/hive:$HIVE_VERSION" \ ---build-arg "BUILD_ENV=unarchive" +--build-arg "BUILD_ENV=unarchive" \ --build-arg "HIVE_VERSION=$HIVE_VERSION" \ --build-arg "HADOOP_VERSION=$HADOOP_VERSION" \ --build-arg "TEZ_VERSION=$TEZ_VERSION" \ diff --git a/packaging/src/docker/entrypoint.sh b/packaging/src/docker/entrypoint.sh index 8f3c4bc564f..a19b50d8daa 100644 --- a/packaging/src/docker/entrypoint.sh +++ b/packaging/src/docker/entrypoint.sh @@ -24,7 +24,11 @@ set -x SKIP_SCHEMA_INIT="${IS_RESUME:-false}" function initialize_hive { - $HIVE_HOME/bin/schematool -dbType $DB_DRIVER -initOrUpgradeSchema + COMMAND="-initOrUpgradeSchema" + if [ "$(echo "$HIVE_VER" | cut -d '.' -f1)" -lt "4" ]; then + COMMAND="-${SCHEMA_COMMAND:-initSchema}" + fi + $HIVE_HOME/bin/schematool -dbType $DB_DRIVER $COMMAND if [ $? -eq 0 ]; then echo "Initialized schema successfully.." else
[hive] branch branch-3 updated: HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new fa4c8305be6 HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia via Ramesh Kumar) fa4c8305be6 is described below commit fa4c8305be64ecc9510ab2bc76d2413e9287597a Author: Diksha628 <43694846+diksha...@users.noreply.github.com> AuthorDate: Tue Sep 12 17:25:54 2023 +0530 HIVE-27666: Backport of HIVE-22903: Vectorized row_number() resets the row number after one batch in case of constant expression in partition clause (Shubham Chaurasia via Ramesh Kumar) Signed-off-by: Sankar Hariappan Closes (#4661) --- .../hive/ql/exec/vector/ptf/VectorPTFOperator.java | 4 +- .../clientpositive/vector_windowing_row_number.q | 75 ++ .../vector_windowing_row_number.q.out | 912 + 3 files changed, 989 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java index 39fab2cba2b..f401cf7faef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFOperator.java @@ -413,8 +413,8 @@ public class VectorPTFOperator extends Operator groupBatches.fillGroupResultsAndForward(this, batch); } -// If we are only processing a PARTITION BY, reset our evaluators. -if (!isPartitionOrderBy) { +// If we are only processing a PARTITION BY and isLastGroupBatch, reset our evaluators. +if (!isPartitionOrderBy && isLastGroupBatch) { groupBatches.resetEvaluators(); } } diff --git a/ql/src/test/queries/clientpositive/vector_windowing_row_number.q b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q new file mode 100644 index 000..673a9ad3d44 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_row_number.q @@ -0,0 +1,75 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table row_number_test; + +create table row_number_test as select explode(split(repeat("w,", 2400), ",")); + +insert into row_number_test select explode(split(repeat("x,", 1200), ",")); + +insert into row_number_test select explode(split(repeat("y,", 700), ",")); + +insert into row_number_test select explode(split(repeat("z,", 600), ",")); + +explain select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +create table row_numbers_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +SET hive.vectorized.execution.enabled=false; +SET hive.vectorized.execution.reduce.enabled=false; +set hive.vectorized.execution.ptf.enabled=false; + +explain select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +create table row_numbers_non_vectorized as select +row_number() over() as r1, +row_number() over(order by col) r2, +row_number() over(partition by col) r3, +row_number() over(partition by col order by col) r4, +row_number() over(partition by 1 order by col) r5, +row_number() over(partition by col order by 2) r6, +row_number() over(partition by 1 order by 2) r7, +col +from row_number_test; + +-- compare results of vectorized with those of non-vectorized execution + +select exists( +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_vectorized +minus +select r1, r2, r3, r4, r5, r6, r7, col from row_numbers_non_vectorized +) diff_exists; + +drop table row_numbers_non_vectorized; +drop table row_numbers_vectorized; +drop table row_number_test;
[hive] branch branch-3 updated: HIVE-27388: Backport of HIVE-23058: Compaction task reattempt fails with FileAlreadyExistsException (Riju Trivedi, reviewed by Laszlo Pinter)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3 by this push: new 597dc69a85e HIVE-27388: Backport of HIVE-23058: Compaction task reattempt fails with FileAlreadyExistsException (Riju Trivedi, reviewed by Laszlo Pinter) 597dc69a85e is described below commit 597dc69a85ec487983a2b12af8e29d24fc61ff04 Author: Diksha628 <43694846+diksha...@users.noreply.github.com> AuthorDate: Tue Sep 12 12:34:06 2023 +0530 HIVE-27388: Backport of HIVE-23058: Compaction task reattempt fails with FileAlreadyExistsException (Riju Trivedi, reviewed by Laszlo Pinter) Signed-off-by: Sankar Hariappan Closes (#4659) --- .../hive/ql/txn/compactor/TestCompactor.java | 60 +++--- .../hadoop/hive/ql/txn/compactor/CompactorMR.java | 13 - 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index 0827bcdb695..c0cf05ea3d0 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -24,14 +24,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.Random; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.*; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -1602,6 +1595,57 @@ public class TestCompactor { 0L, 0L, 1); } + @Test + public void testCompactionForFileInSratchDir() throws Exception { +String dbName = "default"; +String tblName = "cfs"; +String columnNamesProperty = "a,b"; +String columnTypesProperty = "int:string"; +String createQuery = "CREATE TABLE " + tblName + "(a INT, b STRING) " + "STORED AS ORC TBLPROPERTIES ('transactional'='true'," ++ "'transactional_properties'='default')"; +executeStatementOnDriver("drop table if exists " + tblName, driver); +executeStatementOnDriver(createQuery, driver); + + + +// Insert some data -> this will generate only insert deltas +executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(1, 'foo')", driver); + +// Insert some data -> this will again generate only insert deltas +executeStatementOnDriver("INSERT INTO " + tblName + "(a,b) VALUES(2, 'bar')", driver); + +// Find the location of the table +IMetaStoreClient msClient = new HiveMetaStoreClient(conf); +Table table = msClient.getTable(dbName, tblName); +FileSystem fs = FileSystem.get(conf); + +Map tblProperties = new HashMap<>(); + tblProperties.put("compactor.hive.compactor.input.tmp.dir",table.getSd().getLocation() + "/" + "_tmp"); + +//Create empty file in ScratchDir under table location +String scratchDirPath = table.getSd().getLocation() + "/" + "_tmp"; +Path dir = new Path(scratchDirPath + "/base_002_v005"); +fs.mkdirs(dir); +Path emptyFile = AcidUtils.createBucketFile(dir, 0); +fs.create(emptyFile); + +//Run MajorCompaction +TxnStore txnHandler = TxnUtils.getTxnStore(conf); +Worker t = new Worker(); +t.setThreadId((int) t.getId()); +t.setConf(conf); +t.init(new AtomicBoolean(true), new AtomicBoolean()); +CompactionRequest Cr = new CompactionRequest(dbName, tblName, CompactionType.MAJOR); +Cr.setProperties(tblProperties); +txnHandler.compact(Cr); +t.run(); + +ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest()); +Assert.assertEquals(1, rsp.getCompacts().size()); +Assert.assertEquals(TxnStore.CLEANING_RESPONSE, rsp.getCompacts().get(0).getState()); + + } + @Test public void minorCompactWhileStreamingWithSplitUpdate() throws Exception { String dbName = "default"; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index d7e661bcd26..e3ceb3af055 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -1028,7 +1028,18 @@ public class CompactorMR { AcidOutputFormat aof = instantiate(AcidOutputFormat.class, jobConf.get(OUTPUT_FORMAT_CLASS_NAME)); -writer = aof.getRawRecordWriter(new Path(jobConf.get(TMP_LOCATION)), options); +Path rootDir = new Path(jobConf.get(TMP_LOCATION)); +cleanupTmpLocationOnTaskRetry