This is an automated email from the ASF dual-hosted git repository. mbod pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 3f2067f HIVE-25581: Iceberg storage handler should set common projection pruning config (Marton Bod, reviewed by Peter Vary and Adam Szita) 3f2067f is described below commit 3f2067f02c86460030948d930379c168c40eca87 Author: Marton Bod <marton....@gmail.com> AuthorDate: Sat Oct 2 19:29:44 2021 +0200 HIVE-25581: Iceberg storage handler should set common projection pruning config (Marton Bod, reviewed by Peter Vary and Adam Szita) Iceberg queries always need "tez.mrreader.config.update.properties" to be set in order for projection pruning to work. Currently it's only set as part of the TestHiveShell setup for unit tests. We should ensure it's set by the Iceberg storage handler by default for all Iceberg queries. --- data/conf/iceberg/llap/tez-site.xml | 4 ---- data/conf/iceberg/tez/tez-site.xml | 4 ---- .../java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 5 +++++ .../src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java | 3 --- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/data/conf/iceberg/llap/tez-site.xml b/data/conf/iceberg/llap/tez-site.xml index 779886f..7ad5ad4 100644 --- a/data/conf/iceberg/llap/tez-site.xml +++ b/data/conf/iceberg/llap/tez-site.xml @@ -8,10 +8,6 @@ <value>org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled</value> </property> <property> - <name>tez.mrreader.config.update.properties</name> - <value>hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids</value> - </property> - <property> <name>tez.am.resource.memory.mb</name> <value>256</value> </property> diff --git a/data/conf/iceberg/tez/tez-site.xml b/data/conf/iceberg/tez/tez-site.xml index 779886f..7ad5ad4 100644 --- a/data/conf/iceberg/tez/tez-site.xml +++ b/data/conf/iceberg/tez/tez-site.xml @@ -8,10 +8,6 @@ <value>org.apache.tez.dag.app.dag.impl.DAGSchedulerNaturalOrderControlled</value> </property> <property> - <name>tez.mrreader.config.update.properties</name> - <value>hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids</value> - </property> - <property> <name>tez.am.resource.memory.mb</name> <value>256</value> </property> diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index 5df64bf..7a357af 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -164,6 +164,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H @Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { + setCommonJobConf(jobConf); if (tableDesc != null && tableDesc.getProperties() != null && tableDesc.getProperties().get(WRITE_KEY) != null) { String tableName = tableDesc.getTableName(); @@ -359,6 +360,10 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H return IcebergMetadataTables.isValidMetaTable(metaTableName); } + private void setCommonJobConf(JobConf jobConf) { + jobConf.set("tez.mrreader.config.update.properties", "hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids"); + } + public boolean addDynamicSplitPruningEdge(org.apache.hadoop.hive.ql.metadata.Table table, ExprNodeDesc syntheticFilterPredicate) { try { diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java index 3d39889..8538cb1 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java @@ -214,9 +214,6 @@ public class TestHiveShell { // set to true so that the Tez session will create an empty jar for localization hiveConf.setBoolVar(HiveConf.ConfVars.HIVE_IN_TEST_IDE, true); - // enables vectorization on Tez - hiveConf.set("tez.mrreader.config.update.properties", "hive.io.file.readcolumn.names,hive.io.file.readcolumn.ids"); - // set lifecycle hooks hiveConf.setVar(HiveConf.ConfVars.HIVE_QUERY_LIFETIME_HOOKS, HiveIcebergQueryLifeTimeHook.class.getName());