[hive] branch master updated: HIVE-25528: Avoid recalculating types after CBO on second AST pass (#2722) (Stephen Carlin reviewed by Alessandro Solimando, Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 50f54d2 HIVE-25528: Avoid recalculating types after CBO on second AST pass (#2722) (Stephen Carlin reviewed by Alessandro Solimando, Zoltan Haindrich) 50f54d2 is described below commit 50f54d24d4fc367ced045b9b8f25b2a0e358634e Author: scarlin-cloudera <55709772+scarlin-cloud...@users.noreply.github.com> AuthorDate: Wed Oct 27 10:58:20 2021 -0700 HIVE-25528: Avoid recalculating types after CBO on second AST pass (#2722) (Stephen Carlin reviewed by Alessandro Solimando, Zoltan Haindrich) At compile time, Hive parses the query into ASTNodes. For CBO, the ASTNodes get converted into Calcite RexNodes and then get converted back into ASTNodes. After the optimization step is done, the types chosen for each step should be the final type. This commit eliminates any type changes done on the conversion back to the ASTNode. A couple of ptest files changed with this commit. The reason for the change is because there was an extra constant fold done on the conversion back to the ASTNode. This constant fold should have been done at optimization time. A Jira will be filed for this, but there is no adverse effect for this issue. --- parser/pom.xml | 5 + .../org/apache/hadoop/hive/ql/parse/ASTNode.java | 13 + .../optimizer/calcite/translator/ASTConverter.java | 4 ++-- .../calcite/translator/SqlFunctionConverter.java | 4 +++- .../hive/ql/parse/type/TypeCheckProcFactory.java | 17 + .../hive/ql/udf/generic/GenericUDFBaseNumeric.java | 22 +- .../perf/tpcds30tb/tez/query23.q.out | 4 ++-- .../clientpositive/perf/tpcds30tb/tez/query6.q.out | 4 ++-- 8 files changed, 57 insertions(+), 16 deletions(-) diff --git a/parser/pom.xml b/parser/pom.xml index 41fee3b..b02476a 100644 --- a/parser/pom.xml +++ b/parser/pom.xml @@ -62,6 +62,11 @@ ${junit.version} test + + org.apache.hive + hive-serde + ${project.version} + diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java b/parser/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java index f51de08..802872f 100644 --- a/parser/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java +++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java @@ -33,6 +33,7 @@ import org.antlr.runtime.tree.Tree; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * @@ -46,6 +47,10 @@ public class ASTNode extends CommonTree implements Node,Serializable { private transient ASTNode rootNode; private transient boolean isValidASTStr; private transient boolean visited = false; + // At parsing type, the typeInfo isn't known. However, Hive has logic that converts + // the CBO plan back into ASTNode objects, and at this point, the typeInfo has + // been calculated by the optimizer. + private transient TypeInfo typeInfo; private static final Interner TOKEN_CACHE = Interners.newWeakInterner(); @@ -155,6 +160,14 @@ public class ASTNode extends CommonTree implements Node,Serializable { this.origin = origin; } + public void setTypeInfo(TypeInfo typeInfo) { +this.typeInfo = typeInfo; + } + + public TypeInfo getTypeInfo() { +return typeInfo; + } + public String dump() { StringBuilder sb = new StringBuilder("\n"); dump(sb); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 7073bca..78ecd17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -803,7 +803,7 @@ public class ASTConverter { astNodeLst.add(operand.accept(this)); } return SqlFunctionConverter.buildAST(SqlStdOperatorTable.NOT, - Collections.singletonList(SqlFunctionConverter.buildAST(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, astNodeLst))); + Collections.singletonList(SqlFunctionConverter.buildAST(SqlStdOperatorTable.IS_NOT_DISTINCT_FROM, astNodeLst, call.getType())), call.getType()); case CAST: assert(call.getOperands().size() == 1); if (call.getType().isStruct() || @@ -850,7 +850,7 @@ public class ASTConverter { if (isFlat(call)) { return SqlFunctionConverter.buildAST(op, astNodeLst, 0); } else { -return SqlFunctionConverter.buildAST(op, astNodeLst); +
[hive] branch master updated: HIVE-25630: Transformer fixes (#2738) (Zoltan Haindrich reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 9a51d84 HIVE-25630: Transformer fixes (#2738) (Zoltan Haindrich reviewed by Krisztian Kasa) 9a51d84 is described below commit 9a51d84ad3dfb4628078ecd57c8cedbfbc2e4efe Author: Zoltan Haindrich AuthorDate: Wed Oct 27 16:22:24 2021 +0200 HIVE-25630: Transformer fixes (#2738) (Zoltan Haindrich reviewed by Krisztian Kasa) --- .../translated_external_createexisting.q | 13 ++ .../clientpositive/translated_external_alter.q | 8 ++ .../clientpositive/translated_external_rename3.q | 26 .../translated_external_createexisting.q.out | 50 +++ .../llap/translated_external_alter.q.out | 16 +++ .../llap/translated_external_rename3.q.out | 154 + .../hadoop/hive/metastore/conf/MetastoreConf.java | 7 +- .../hive/metastore/utils/MetaStoreUtils.java | 2 +- .../hadoop/hive/metastore/ExceptionHandler.java| 2 +- .../apache/hadoop/hive/metastore/HMSHandler.java | 5 + .../metastore/MetastoreDefaultTransformer.java | 12 +- .../hive/metastore/TestMetastoreTransformer.java | 141 +++ .../client/TestTablesCreateDropAlterTruncate.java | 1 + 13 files changed, 430 insertions(+), 7 deletions(-) diff --git a/ql/src/test/queries/clientnegative/translated_external_createexisting.q b/ql/src/test/queries/clientnegative/translated_external_createexisting.q new file mode 100644 index 000..c2dab16 --- /dev/null +++ b/ql/src/test/queries/clientnegative/translated_external_createexisting.q @@ -0,0 +1,13 @@ +set metastore.metadata.transformer.class=org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer; +set metastore.metadata.transformer.location.mode=prohibit; + +set hive.fetch.task.conversion=none; +set hive.compute.query.using.stats=false; + +create table t (a integer); + +-- table should be translated +desc formatted t; + +create table t (a integer); + diff --git a/ql/src/test/queries/clientpositive/translated_external_alter.q b/ql/src/test/queries/clientpositive/translated_external_alter.q new file mode 100644 index 000..7010782 --- /dev/null +++ b/ql/src/test/queries/clientpositive/translated_external_alter.q @@ -0,0 +1,8 @@ +set metastore.metadata.transformer.class=org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer; +set metastore.metadata.transformer.location.mode=seqsuffix; + +set hive.fetch.task.conversion=none; +set hive.compute.query.using.stats=false; + +create table caseSensitive (a integer); +alter table casesEnsitivE set tblproperties('some'='one'); diff --git a/ql/src/test/queries/clientpositive/translated_external_rename3.q b/ql/src/test/queries/clientpositive/translated_external_rename3.q new file mode 100644 index 000..7ccce0d --- /dev/null +++ b/ql/src/test/queries/clientpositive/translated_external_rename3.q @@ -0,0 +1,26 @@ +set metastore.metadata.transformer.class=org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer; +set metastore.metadata.transformer.location.mode=force; + +set hive.fetch.task.conversion=none; +set hive.compute.query.using.stats=false; + +create external table t (a integer); +insert into t values(1); +alter table t rename to t2; + +-- this TRANSLATED table will have its location shared with the pre-existing t2 table +create table t (a integer); +insert into t values(2); + +-- the rows from bot T and T2 can be seen from both tables +select assert_true(count(1) = 2) from t; +select assert_true(count(1) = 2) from t2; + +select * from t; +select * from t2; + +-- the location of both T and T2 is the same +desc formatted t; +desc formatted t2; + + diff --git a/ql/src/test/results/clientnegative/translated_external_createexisting.q.out b/ql/src/test/results/clientnegative/translated_external_createexisting.q.out new file mode 100644 index 000..3550da4 --- /dev/null +++ b/ql/src/test/results/clientnegative/translated_external_createexisting.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: create table t (a integer) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (a integer) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: desc formatted t +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@t +POSTHOOK: query: desc formatted t +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@t +# col_name data_type comment +a int + +# Detailed Table Information +Database: default + A masked pattern was here +Retention: 0 + A masked pattern was here
[hive] branch master updated (9ed1d1e -> f749ef2)
This is an automated email from the ASF dual-hosted git repository. lpinter pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/hive.git. from 9ed1d1e HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan) add f749ef2 HIVE-25642 Log a warning if multiple Compaction Worker versions are running compactions (#2743) (Viktor Csomor, reviewed by Laszlo Pinter) No new revisions were added by this update. Summary of changes: .../hadoop/hive/metastore/conf/MetastoreConf.java | 6 + .../hive/metastore/metrics/AcidMetricService.java | 41 ++- .../TestMultipleWorkerVersionDetection.java| 123 + 3 files changed, 165 insertions(+), 5 deletions(-) create mode 100644 standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/metrics/TestMultipleWorkerVersionDetection.java
[hive] branch branch-3.1 updated: HIVE-25600: Compaction job creates redundant base/delta folder within base/delta folder (Nikhil Gupta, reviewed by Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new ce5e13d HIVE-25600: Compaction job creates redundant base/delta folder within base/delta folder (Nikhil Gupta, reviewed by Sankar Hariappan) ce5e13d is described below commit ce5e13da3554df8538d46e10dce87b4ef04c3119 Author: guptanikhil007 AuthorDate: Wed Oct 27 13:36:10 2021 +0530 HIVE-25600: Compaction job creates redundant base/delta folder within base/delta folder (Nikhil Gupta, reviewed by Sankar Hariappan) Signed-off-by: Sankar Hariappan Closes (#2705) --- .../org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java | 11 +++ 1 file changed, 11 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java index 95870ad..474f6c5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java @@ -1184,6 +1184,16 @@ public class CompactorMR { Path tmpPath = fileStatus.getPath(); //newPath is the base/delta dir Path newPath = new Path(finalLocation, tmpPath.getName()); +/* rename(A, B) has "interesting" behavior if A and B are directories. If B doesn't exist, +* it does the expected operation and everything that was in A is now in B. If B exists, +* it will make A a child of B. +* This issue can happen if the previous MR job succeeded but HMS was unable to persist compaction result. +* We will delete the directory B if it exists to avoid the above issue +*/ +if (fs.exists(newPath)) { + LOG.info(String.format("Final path %s already exists. Deleting the path to avoid redundant base creation", newPath.toString())); + fs.delete(newPath, true); +} /* Create the markers in the tmp location and rename everything in the end to prevent race condition between * marker creation and split read. */ AcidUtils.OrcAcidVersion.writeVersionFile(tmpPath, fs); @@ -1192,6 +1202,7 @@ public class CompactorMR { } fs.delete(tmpLocation, true); } + private void createCompactorMarker(JobConf conf, Path finalLocation, FileSystem fs) throws IOException { if(conf.getBoolean(IS_MAJOR, false)) {
[hive] branch master updated: HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository. sankarh pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 9ed1d1e HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan) 9ed1d1e is described below commit 9ed1d1ed720196d25f6ad65c1964a8a6924ce9d6 Author: Sruthi Mooriyathvariam AuthorDate: Wed Oct 27 13:24:05 2021 +0530 HIVE-25553: Support Map data-type natively in Arrow format (Sruthi Mooriyathvariam, reviewed by Sankar Hariappan) This covers the following sub-tasks: HIVE-25554: Upgrade arrow version to 0.15 HIVE-2: ArrowColumnarBatchSerDe should store map natively instead of converting to list a. Upgrading arrow version to version 0.15.0 (where map data-type is supported) b. Modifying ArrowColumnarBatchSerDe and corresponding Serializer/Deserializer to not use list as a workaround for map and use the arrow map data-type instead c. Taking care of creating non-nullable struct and non-nullable key type for the map data-type in ArrowColumnarBatchSerDe Signed-off-by: Sankar Hariappan Closes (#2751) --- data/files/datatypes.txt | 4 +- .../org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java | 16 +++-- .../java/org/apache/hive/jdbc/TestJdbcDriver2.java | 8 +-- .../hive/jdbc/TestJdbcWithMiniLlapArrow.java | 83 -- .../hive/jdbc/TestJdbcWithMiniLlapVectorArrow.java | 83 -- .../apache/hive/jdbc/cbo_rp_TestJdbcDriver2.java | 8 +-- pom.xml| 2 +- .../hive/llap/WritableByteChannelAdapter.java | 2 +- .../hive/ql/io/arrow/ArrowColumnarBatchSerDe.java | 17 ++--- .../apache/hadoop/hive/ql/io/arrow/Serializer.java | 42 +++ .../hadoop/hive/ql/io/arrow/TestSerializer.java| 18 ++--- 11 files changed, 160 insertions(+), 123 deletions(-) diff --git a/data/files/datatypes.txt b/data/files/datatypes.txt index 0872a1f..38f8d29 100644 --- a/data/files/datatypes.txt +++ b/data/files/datatypes.txt @@ -1,3 +1,3 @@ \N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N --1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N\N\N -1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.123456YWJjZA==2013-01-01abc123abc123X'01FF' +-1false-1.11\Nab\N\N\N-1-1-1.0-1110100\N\N\N\N\N\N\N\N\N +1true1.11121x2ykvbca92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.123456YWJjZA==2013-01-01abc123abc123X'01FF' \ No newline at end of file diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java index 20682ff..2ec3d48 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/BaseJdbcWithMiniLlap.java @@ -499,10 +499,12 @@ public abstract class BaseJdbcWithMiniLlap { assertEquals(0, c5Value.size()); Map c6Value = (Map) rowValues[5]; -assertEquals(0, c6Value.size()); +assertEquals(1, c6Value.size()); +assertEquals(null, c6Value.get(1)); Map c7Value = (Map) rowValues[6]; -assertEquals(0, c7Value.size()); +assertEquals(1, c7Value.size()); +assertEquals("b", c7Value.get("a")); List c8Value = (List) rowValues[7]; assertEquals(null, c8Value.get(0)); @@ -518,7 +520,10 @@ public abstract class BaseJdbcWithMiniLlap { assertEquals(0, c13Value.size()); Map c14Value = (Map) rowValues[13]; -assertEquals(0, c14Value.size()); +assertEquals(1, c14Value.size()); +Map mapVal = (Map) c14Value.get(Integer.valueOf(1)); +assertEquals(1, mapVal.size()); +assertEquals(100, mapVal.get(Integer.valueOf(10))); List c15Value = (List) rowValues[14]; assertEquals(null, c15Value.get(0)); @@ -553,8 +558,9 @@ public abstract class BaseJdbcWithMiniLlap { assertEquals("y", c6Value.get(Integer.valueOf(2))); c7Value = (Map) rowValues[6]; -assertEquals(1, c7Value.size()); +assertEquals(2, c7Value.size()); assertEquals("v", c7Value.get("k")); +assertEquals("c", c7Value.get("b")); c8Value = (List) rowValues[7]; assertEquals("a", c8Value.get(0)); @@ -577,7 +583,7 @@ public abstract class BaseJdbcWithMiniLlap { c14Value = (Map) rowValues[13]; assertEquals(2, c14Value.size()); -Map mapVal = (Map) c14Value.get(Integer.valueOf(1)); +mapVal = (Map) c14Value.get(Integer.valueOf(1)); assertEquals(2, mapVal.size()); assertEquals(Integer.valueOf(12), mapVal.get(Integer.valueOf(11)));
[hive] branch master updated: HIVE-25633: Prevent shutdown of MetaStore scheduled worker ThreadPool (reviewed by Eugene Chung and Krisztian Kasa) (#2737)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new bdab34c HIVE-25633: Prevent shutdown of MetaStore scheduled worker ThreadPool (reviewed by Eugene Chung and Krisztian Kasa) (#2737) bdab34c is described below commit bdab34c32f9ebba3a9f8efe40adab5a47d75af99 Author: Zoltan Haindrich AuthorDate: Wed Oct 27 09:19:34 2021 +0200 HIVE-25633: Prevent shutdown of MetaStore scheduled worker ThreadPool (reviewed by Eugene Chung and Krisztian Kasa) (#2737) --- .../src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java | 1 - 1 file changed, 1 deletion(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java index 7c46857..a75091b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java @@ -991,7 +991,6 @@ public class HMSHandler extends FacebookBase implements IHMSHandler { public void shutdown() { cleanupRawStore(); PerfLogger.getPerfLogger(false).cleanupPerfLogMetrics(); -ThreadPool.shutdown(); } @Override