[hive] branch master updated: HIVE-26170: Code cleanup in jdbc dataconnector (#3237)

2022-04-26 Thread ngangam
This is an automated email from the ASF dual-hosted git repository.

ngangam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new a6ef9dc903f HIVE-26170: Code cleanup in jdbc dataconnector (#3237)
a6ef9dc903f is described below

commit a6ef9dc903f1e7c4ec53f925ab823f20f9331db8
Author: Butao Zhang <9760681+zhangbu...@users.noreply.github.com>
AuthorDate: Wed Apr 27 00:08:34 2022 +0800

HIVE-26170: Code cleanup in jdbc dataconnector (#3237)
---
 .../hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java  | 3 ---
 .../hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java | 2 --
 .../metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java | 4 +---
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java
index 5a8dac35240..f2ef01044e2 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/DerbySQLConnectorProvider.java
@@ -21,14 +21,11 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc;
 import org.apache.hadoop.hive.metastore.ColumnType;
 import org.apache.hadoop.hive.metastore.api.DataConnector;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Table;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.sql.ResultSet;
 import java.sql.SQLException;
-import java.util.List;
 
 public class DerbySQLConnectorProvider extends AbstractJDBCConnectorProvider {
   private static Logger LOG = 
LoggerFactory.getLogger(DerbySQLConnectorProvider.class);
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java
index b2450b7a547..6bb10b97b41 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/MySQLConnectorProvider.java
@@ -21,14 +21,12 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc;
 import org.apache.hadoop.hive.metastore.ColumnType;
 import org.apache.hadoop.hive.metastore.api.DataConnector;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Table;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
-import java.util.List;
 
 public class MySQLConnectorProvider extends AbstractJDBCConnectorProvider {
   private static Logger LOG = 
LoggerFactory.getLogger(MySQLConnectorProvider.class);
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java
index 2a9c86dfa26..b9214f562bd 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/dataconnector/jdbc/PostgreSQLConnectorProvider.java
@@ -21,16 +21,14 @@ package org.apache.hadoop.hive.metastore.dataconnector.jdbc;
 import org.apache.hadoop.hive.metastore.ColumnType;
 import org.apache.hadoop.hive.metastore.api.DataConnector;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.Table;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.sql.ResultSet;
 import java.sql.SQLException;
-import java.util.List;
 
 public class PostgreSQLConnectorProvider extends AbstractJDBCConnectorProvider 
{
-  private static Logger LOG = 
LoggerFactory.getLogger(MySQLConnectorProvider.class);
+  private static Logger LOG = 
LoggerFactory.getLogger(PostgreSQLConnectorProvider.class);
   private static final String DRIVER_CLASS = "org.postgresql.Driver".intern();
 
   public PostgreSQLConnectorProvider(String dbName, DataConnector dataConn) {



[hive] branch master updated: HIVE-26135: Invalid Anti join conversion may cause missing results (#3205) (Zoltan Haindrich reviewed by Krisztian Kasa)

2022-04-26 Thread kgyrtkirk
This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 59d462ad3e0 HIVE-26135: Invalid Anti join conversion may cause missing 
results (#3205) (Zoltan Haindrich reviewed by Krisztian Kasa)
59d462ad3e0 is described below

commit 59d462ad3e023352ffbd57b4f2446e497a421252
Author: Zoltan Haindrich 
AuthorDate: Tue Apr 26 17:22:49 2022 +0200

HIVE-26135: Invalid Anti join conversion may cause missing results (#3205) 
(Zoltan Haindrich reviewed by Krisztian Kasa)
---
 .../hive/ql/optimizer/calcite/HiveCalciteUtil.java |  18 +-
 .../calcite/rules/HiveAntiSemiJoinRule.java|  23 +-
 .../queries/clientpositive/antijoin_conversion.q   |  22 ++
 .../clientpositive/llap/antijoin_conversion.q.out  | 280 +
 4 files changed, 339 insertions(+), 4 deletions(-)

diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index d925f159fba..160bfb86f6c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -630,7 +630,7 @@ public class HiveCalciteUtil {
   }
 };
 
-  public static ImmutableList getPredsNotPushedAlready(RelNode inp, 
List predsToPushDown) {   
+  public static ImmutableList getPredsNotPushedAlready(RelNode inp, 
List predsToPushDown) {
 return getPredsNotPushedAlready(Sets.newHashSet(), inp, 
predsToPushDown);
   }
 
@@ -1238,6 +1238,22 @@ public class HiveCalciteUtil {
 return false;
   }
 
+  public static boolean hasAllExpressionsFromRightSide(RelNode joinRel, 
List expressions) {
+List joinFields = joinRel.getRowType().getFieldList();
+int nTotalFields = joinFields.size();
+List leftFields = 
(joinRel.getInputs().get(0)).getRowType().getFieldList();
+int nFieldsLeft = leftFields.size();
+ImmutableBitSet rightBitmap = ImmutableBitSet.range(nFieldsLeft, 
nTotalFields);
+
+for (RexNode node : expressions) {
+  ImmutableBitSet inputBits = RelOptUtil.InputFinder.bits(node);
+  if (!rightBitmap.contains(inputBits)) {
+return false;
+  }
+}
+return true;
+  }
+
   /**
* Extracts inputs referenced by aggregate operator.
*/
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
index 14a64c3d75c..3697ec2c4aa 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAntiSemiJoinRule.java
@@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.plan.Strong;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.Join;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
@@ -36,8 +39,7 @@ import org.slf4j.LoggerFactory;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
  * Planner rule that converts a join plus filter to anti join.
@@ -136,7 +138,8 @@ public class HiveAntiSemiJoinRule extends RelOptRule {
 for (RexNode filterNode : aboveFilters) {
   if (filterNode.getKind() == SqlKind.IS_NULL) {
 // Null filter from right side table can be removed and its a 
pre-condition for anti join conversion.
-if (HiveCalciteUtil.hasAnyExpressionFromRightSide(join, 
Collections.singletonList(filterNode))) {
+if (HiveCalciteUtil.hasAllExpressionsFromRightSide(join, 
Collections.singletonList(filterNode))
+&& isStrong(((RexCall) filterNode).getOperands().get(0))) {
   hasNullFilterOnRightSide = true;
 } else {
   filterList.add(filterNode);
@@ -157,4 +160,18 @@ public class HiveAntiSemiJoinRule extends RelOptRule {
 }
 return filterList;
   }
+
+  private boolean isStrong(RexNode rexNode) {
+AtomicBoolean hasCast = new 

[hive] branch master updated: HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)

2022-04-26 Thread lpinter
This is an automated email from the ASF dual-hosted git repository.

lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new dbdcf00dd63 HIVE-26169: Set non-vectorized mode as default when 
accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed 
by Marton Bod)
dbdcf00dd63 is described below

commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e
Author: László Pintér <4102+lcspin...@users.noreply.github.com>
AuthorDate: Tue Apr 26 16:06:55 2022 +0200

HIVE-26169: Set non-vectorized mode as default when accessing iceberg 
tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 18 ++
 .../hive/HiveIcebergStorageHandlerWithEngineBase.java  |  3 +--
 .../apache/iceberg/mr/hive/TestHiveIcebergSelects.java |  2 +-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index f23b0d16c10..6fdddb9b343 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -78,6 +78,7 @@ import org.apache.hadoop.mapred.JobID;
 import org.apache.hadoop.mapred.JobStatus;
 import org.apache.hadoop.mapred.OutputCommitter;
 import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.PartitionSpecParser;
 import org.apache.iceberg.Schema;
@@ -167,14 +168,14 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   public void configureInputJobProperties(TableDesc tableDesc, Map map) {
 overlayTableProperties(conf, tableDesc, map);
 // Until the vectorized reader can handle delete files, let's fall back to 
non-vector mode for V2 tables
-fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
   }
 
   @Override
   public void configureOutputJobProperties(TableDesc tableDesc, Map map) {
 overlayTableProperties(conf, tableDesc, map);
 // Until the vectorized reader can handle delete files, let's fall back to 
non-vector mode for V2 tables
-fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
 // For Tez, setting the committer here is enough to make sure it'll be 
part of the jobConf
 map.put("mapred.output.committer.class", 
HiveIcebergNoJobCommitter.class.getName());
 // For MR, the jobConf is set only in configureJobConf, so we're setting 
the write key here to detect it over there
@@ -744,8 +745,17 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
 return column;
   }
 
-  private void fallbackToNonVectorizedModeForV2(Properties tableProps) {
-if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) {
+  /**
+   * If any of the following checks is true we fall back to non vectorized 
mode:
+   * 
+   *   iceberg format-version is "2"
+   *   fileformat is set to avro
+   * 
+   * @param tableProps table properties, must be not null
+   */
+  private void fallbackToNonVectorizedModeBasedOnProperties(Properties 
tableProps) {
+if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) ||
+
FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)))
 {
   conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, 
false);
 }
   }
diff --git 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
index 95f03cdade0..6de80dfd32e 100644
--- 
a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
+++ 
b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
@@ -112,8 +112,7 @@ public abstract class 
HiveIcebergStorageHandlerWithEngineBase {
 if (javaVersion.equals("1.8")) {
   testParams.add(new Object[] {fileFormat, engine, 
TestTables.TestTableType.HIVE_CATALOG, false});
   // test for vectorization=ON in case of ORC and PARQUET format with 
Tez engine
-  if ((fileFormat == FileFormat.ORC || fileFormat == 
FileFormat.PARQUET) &&
-   "tez".equals(engine) && 
MetastoreUtil.hive3PresentOnClasspath()) {
+  if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && 

[hive] branch master updated: HIVE-26171: HMSHandler get_all_tables method can not retrieve tables from remote database (Butao Zhang reviewed by Peter Vary) (#3238)

2022-04-26 Thread pvary
This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
 new 3c5613fa88f HIVE-26171: HMSHandler get_all_tables method can not 
retrieve tables from remote database (Butao Zhang reviewed by Peter Vary) 
(#3238)
3c5613fa88f is described below

commit 3c5613fa88f35f81df944b241d95a6f78ef71d7d
Author: Butao Zhang <9760681+zhangbu...@users.noreply.github.com>
AuthorDate: Tue Apr 26 16:42:09 2022 +0800

HIVE-26171: HMSHandler get_all_tables method can not retrieve tables from 
remote database (Butao Zhang reviewed by Peter Vary) (#3238)
---
 .../src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java | 7 +++
 1 file changed, 7 insertions(+)

diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
index 1f8365e3140..32ed701b03b 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
@@ -6234,6 +6234,13 @@ public class HMSHandler extends FacebookBase implements 
IHMSHandler {
 List ret = null;
 Exception ex = null;
 String[] parsedDbName = parseDbName(dbname, conf);
+try {
+  if (isDatabaseRemote(dbname)) {
+Database db = get_database_core(parsedDbName[CAT_NAME], 
parsedDbName[DB_NAME]);
+return 
DataConnectorProviderFactory.getDataConnectorProvider(db).getTableNames();
+  }
+} catch (Exception e) { /* ignore */ }
+
 try {
   ret = getMS().getAllTables(parsedDbName[CAT_NAME], 
parsedDbName[DB_NAME]);
   ret = FilterUtils.filterTableNamesIfEnabled(isServerFilterEnabled, 
filterHook,