This is an automated email from the ASF dual-hosted git repository. lpinter pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new dbdcf00dd63 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod) dbdcf00dd63 is described below commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e Author: László Pintér <47777102+lcspin...@users.noreply.github.com> AuthorDate: Tue Apr 26 16:06:55 2022 +0200 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod) --- .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 18 ++++++++++++++---- .../hive/HiveIcebergStorageHandlerWithEngineBase.java | 3 +-- .../apache/iceberg/mr/hive/TestHiveIcebergSelects.java | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java index f23b0d16c10..6fdddb9b343 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java @@ -78,6 +78,7 @@ import org.apache.hadoop.mapred.JobID; import org.apache.hadoop.mapred.JobStatus; import org.apache.hadoop.mapred.OutputCommitter; import org.apache.hadoop.mapred.OutputFormat; +import org.apache.iceberg.FileFormat; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.PartitionSpecParser; import org.apache.iceberg.Schema; @@ -167,14 +168,14 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> map) { overlayTableProperties(conf, tableDesc, map); // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables - fallbackToNonVectorizedModeForV2(tableDesc.getProperties()); + fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties()); } @Override public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) { overlayTableProperties(conf, tableDesc, map); // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables - fallbackToNonVectorizedModeForV2(tableDesc.getProperties()); + fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties()); // For Tez, setting the committer here is enough to make sure it'll be part of the jobConf map.put("mapred.output.committer.class", HiveIcebergNoJobCommitter.class.getName()); // For MR, the jobConf is set only in configureJobConf, so we're setting the write key here to detect it over there @@ -744,8 +745,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H return column; } - private void fallbackToNonVectorizedModeForV2(Properties tableProps) { - if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) { + /** + * If any of the following checks is true we fall back to non vectorized mode: + * <ul> + * <li>iceberg format-version is "2"</li> + * <li>fileformat is set to avro</li> + * </ul> + * @param tableProps table properties, must be not null + */ + private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) { + if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) || + FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) { conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java index 95f03cdade0..6de80dfd32e 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java @@ -112,8 +112,7 @@ public abstract class HiveIcebergStorageHandlerWithEngineBase { if (javaVersion.equals("1.8")) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false}); // test for vectorization=ON in case of ORC and PARQUET format with Tez engine - if ((fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) && - "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) { + if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) { testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true}); } } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java index 29051d0f5b0..ff54a9b0e2a 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java @@ -63,7 +63,6 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB @Test public void testCBOWithSelectedColumnsNonOverlapJoin() throws IOException { shell.setHiveSessionValue("hive.cbo.enable", true); - testTables.createTable(shell, "products", PRODUCT_SCHEMA, fileFormat, PRODUCT_RECORDS); testTables.createTable(shell, "orders", ORDER_SCHEMA, fileFormat, ORDER_RECORDS); @@ -190,6 +189,7 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB @Test public void testScanTableCaseInsensitive() throws IOException { + shell.setHiveSessionValue(InputFormatConfig.CASE_SENSITIVE, false); testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA_WITH_UPPERCASE, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);