[hive] branch master updated: HIVE-26170: Code cleanup in jdbc dataconnector (#3237)
This is an automated email from the ASF dual-hosted git repository. ngangam pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new a6ef9dc903f HIVE-26170: Code cleanup in jdbc dataconnector (#3237) a6ef9dc903f is described below commit a6ef9dc903f1e7c4ec53f925ab823f20f9331db8 Author: Butao Zhang <9760681+zhangbu...@users.noreply.github.com> AuthorDate: Wed Apr 27 00:08:34 2022 +0800 HIVE-26170: Code cleanup in jdbc dataconnector (#3237) --- .../hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java | 3 --- .../hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java | 2 -- .../metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java | 4 +--- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java index 5a8dac35240..f2ef01044e2 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java @@ -21,14 +21,11 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc; import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.api.DataConnector; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.List; public class DerbySQLConnectorProvider extends AbstractJDBCConnectorProvider { private static Logger LOG = LoggerFactory.getLogger(DerbySQLConnectorProvider.class); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java index b2450b7a547..6bb10b97b41 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java @@ -21,14 +21,12 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc; import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.api.DataConnector; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; -import java.util.List; public class MySQLConnectorProvider extends AbstractJDBCConnectorProvider { private static Logger LOG = LoggerFactory.getLogger(MySQLConnectorProvider.class); diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java index 2a9c86dfa26..b9214f562bd 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java @@ -21,16 +21,14 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc; import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.api.DataConnector; import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.Table; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.ResultSet; import java.sql.SQLException; -import java.util.List; public class PostgreSQLConnectorProvider extends AbstractJDBCConnectorProvider { - private static Logger LOG = LoggerFactory.getLogger(MySQLConnectorProvider.class); + private static Logger LOG = LoggerFactory.getLogger(PostgreSQLConnectorProvider.class); private static final String DRIVER_CLASS = "org.postgresql.Driver".intern(); public PostgreSQLConnectorProvider(String dbName, DataConnector dataConn) {
[hive] branch master updated: HIVE-26135: Invalid Anti join conversion may cause missing results (#3205) (Zoltan Haindrich reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 59d462ad3e0 HIVE-26135: Invalid Anti join conversion may cause missing results (#3205) (Zoltan Haindrich reviewed by Krisztian Kasa) 59d462ad3e0 is described below commit 59d462ad3e023352ffbd57b4f2446e497a421252 Author: Zoltan Haindrich AuthorDate: Tue Apr 26 17:22:49 2022 +0200 HIVE-26135: Invalid Anti join conversion may cause missing results (#3205) (Zoltan Haindrich reviewed by Krisztian Kasa) --- .../hive/ql/optimizer/calcite/HiveCalciteUtil.java | 18 +- .../calcite/rules/HiveAntiSemiJoinRule.java| 23 +- .../queries/clientpositive/antijoin_conversion.q | 22 ++ .../clientpositive/llap/antijoin_conversion.q.out | 280 + 4 files changed, 339 insertions(+), 4 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index d925f159fba..160bfb86f6c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -630,7 +630,7 @@ public class HiveCalciteUtil { } }; - public static ImmutableList getPredsNotPushedAlready(RelNode inp, List predsToPushDown) { + public static ImmutableList getPredsNotPushedAlready(RelNode inp, List predsToPushDown) { return getPredsNotPushedAlready(Sets.newHashSet(), inp, predsToPushDown); } @@ -1238,6 +1238,22 @@ public class HiveCalciteUtil { return false; } + public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, List expressions) { +List joinFields = joinRel.getRowType().getFieldList(); +int nTotalFields = joinFields.size(); +List leftFields = (joinRel.getInputs().get(0)).getRowType().getFieldList(); +int nFieldsLeft = leftFields.size(); +ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, nTotalFields); + +for (RexNode node : expressions) { + ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node); + if (!rightBitmap.contains(inputBits)) { +return false; + } +} +return true; + } + /** * Extracts inputs referenced by aggregate operator. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java index 14a64c3d75c..3697ec2c4aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java @@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.Strong; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexVisitorImpl; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; @@ -36,8 +39,7 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; +import java.util.concurrent.atomic.AtomicBoolean; /** * Planner rule that converts a join plus filter to anti join. @@ -136,7 +138,8 @@ public class HiveAntiSemiJoinRule extends RelOptRule { for (RexNode filterNode : aboveFilters) { if (filterNode.getKind() == SqlKind.IS_NULL) { // Null filter from right side table can be removed and its a pre-condition for anti join conversion. -if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, Collections.singletonList(filterNode))) { +if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, Collections.singletonList(filterNode)) +&& isStrong(((RexCall) filterNode).getOperands().get(0))) { hasNullFilterOnRightSide = true; } else { filterList.add(filterNode); @@ -157,4 +160,18 @@ public class HiveAntiSemiJoinRule extends RelOptRule { } return filterList; } + + private boolean isStrong(RexNode rexNode) { +AtomicBoolean hasCast = new
[hive] branch master updated: HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
This is an automated email from the ASF dual-hosted git repository. lpinter pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new dbdcf00dd63 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod) dbdcf00dd63 is described below commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e Author: László Pintér <4102+lcspin...@users.noreply.github.com> AuthorDate: Tue Apr 26 16:06:55 2022 +0200 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod) --- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 18 ++ .../hive/HiveIcebergStorageHandlerWithEngineBase.java | 3 +-- .../apache/iceberg/mr/hive/TestHiveIcebergSelects.java | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index f23b0d16c10..6fdddb9b343 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -78,6 +78,7 @@ import org.apache.hadoop.mapred.JobID; import org.apache.hadoop.mapred.JobStatus; import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.OutputFormat; +import org.apache.iceberg.FileFormat; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; @@ -167,14 +168,14 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H public void configureInputJobProperties(TableDesc tableDesc, Map map) { overlayTableProperties(conf, tableDesc, map); // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables -fallbackToNonVectorizedModeForV2(tableDesc.getProperties()); +fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties()); } @Override public void configureOutputJobProperties(TableDesc tableDesc, Map map) { overlayTableProperties(conf, tableDesc, map); // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables -fallbackToNonVectorizedModeForV2(tableDesc.getProperties()); +fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties()); // For Tez, setting the committer here is enough to make sure it'll be part of the jobConf map.put("mapred.output.committer.class", HiveIcebergNoJobCommitter.class.getName()); // For MR, the jobConf is set only in configureJobConf, so we're setting the write key here to detect it over there @@ -744,8 +745,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H return column; } - private void fallbackToNonVectorizedModeForV2(Properties tableProps) { -if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) { + /** + * If any of the following checks is true we fall back to non vectorized mode: + * + * iceberg format-version is "2" + * fileformat is set to avro + * + * @param tableProps table properties, must be not null + */ + private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) { +if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) || + FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) { conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java index 95f03cdade0..6de80dfd32e 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java @@ -112,8 +112,7 @@ public abstract class HiveIcebergStorageHandlerWithEngineBase { if (javaVersion.equals("1.8")) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false}); // test for vectorization=ON in case of ORC and PARQUET format with Tez engine - if ((fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) && - "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) { + if (fileFormat != FileFormat.METADATA && "tez".equals(engine) &&
[hive] branch master updated: HIVE-26171: HMSHandler get_all_tables method can not retrieve tables from remote database (Butao Zhang reviewed by Peter Vary) (#3238)
This is an automated email from the ASF dual-hosted git repository. pvary pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git The following commit(s) were added to refs/heads/master by this push: new 3c5613fa88f HIVE-26171: HMSHandler get_all_tables method can not retrieve tables from remote database (Butao Zhang reviewed by Peter Vary) (#3238) 3c5613fa88f is described below commit 3c5613fa88f35f81df944b241d95a6f78ef71d7d Author: Butao Zhang <9760681+zhangbu...@users.noreply.github.com> AuthorDate: Tue Apr 26 16:42:09 2022 +0800 HIVE-26171: HMSHandler get_all_tables method can not retrieve tables from remote database (Butao Zhang reviewed by Peter Vary) (#3238) --- .../src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java | 7 +++ 1 file changed, 7 insertions(+) diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java index 1f8365e3140..32ed701b03b 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java @@ -6234,6 +6234,13 @@ public class HMSHandler extends FacebookBase implements IHMSHandler { List ret = null; Exception ex = null; String[] parsedDbName = parseDbName(dbname, conf); +try { + if (isDatabaseRemote(dbname)) { +Database db = get_database_core(parsedDbName[CAT_NAME], parsedDbName[DB_NAME]); +return DataConnectorProviderFactory.getDataConnectorProvider(db).getTableNames(); + } +} catch (Exception e) { /* ignore */ } + try { ret = getMS().getAllTables(parsedDbName[CAT_NAME], parsedDbName[DB_NAME]); ret = FilterUtils.filterTableNamesIfEnabled(isServerFilterEnabled, filterHook,