This is an automated email from the ASF dual-hosted git repository. kuczoram pushed a commit to branch revert-2419-HIVE-25276 in repository https://gitbox.apache.org/repos/asf/hive.git
commit b7c809d79146e4fa09a88f23be7b928bde48ea95 Author: kuczoram <kuczo...@gmail.com> AuthorDate: Tue Jul 20 22:04:06 2021 +0200 Revert "HIVE-25276: Enable automatic statistics generation for Iceberg tables (Peter Vary reviewed by Marton Bod and Adam Szita)" This reverts commit 76c49b9df957c8c05b81a4016282c03648b728b9. --- .../iceberg/mr/hive/HiveIcebergMetaHook.java | 33 +++----- .../TestHiveIcebergStorageHandlerWithEngine.java | 97 ---------------------- .../org/apache/iceberg/mr/hive/TestHiveShell.java | 9 +- .../results/positive/vectorized_iceberg_read.q.out | 10 +-- .../hive/ql/ddl/table/create/CreateTableDesc.java | 30 ++----- .../apache/hadoop/hive/ql/exec/tez/DagUtils.java | 5 +- .../hadoop/hive/ql/parse/PartitionTransform.java | 32 ++----- .../hive/ql/parse/PartitionTransformSpec.java | 9 -- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 3 +- .../hive/ql/exec/tez/TestTezOutputCommitter.java | 1 - 10 files changed, 39 insertions(+), 190 deletions(-) diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java index 4a4271b..648b688 100644 --- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java +++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java @@ -39,14 +39,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ddl.table.AlterTableType; import org.apache.hadoop.hive.ql.io.AcidUtils; -import org.apache.hadoop.hive.ql.parse.PartitionTransform; -import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec; -import org.apache.hadoop.hive.ql.session.SessionStateUtil; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.iceberg.BaseMetastoreTableOperations; import org.apache.iceberg.BaseTable; @@ -143,7 +139,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { // Iceberg schema and specification generated by the code Schema schema = schema(catalogProperties, hmsTable); - PartitionSpec spec = spec(conf, schema, hmsTable); + PartitionSpec spec = spec(conf, schema, catalogProperties, hmsTable); // If there are partition keys specified remove them from the HMS table and add them to the column list if (hmsTable.isSetPartitionKeys()) { @@ -241,21 +237,15 @@ public class HiveIcebergMetaHook implements HiveMetaHook { preAlterTableProperties.tableLocation = sd.getLocation(); preAlterTableProperties.format = sd.getInputFormat(); preAlterTableProperties.schema = schema(catalogProperties, hmsTable); + preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, catalogProperties, hmsTable); preAlterTableProperties.partitionKeys = hmsTable.getPartitionKeys(); context.getProperties().put(HiveMetaHook.ALLOW_PARTITION_KEY_CHANGE, "true"); // If there are partition keys specified remove them from the HMS table and add them to the column list - if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) { - List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys()); - if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) { - throw new MetaException("Query state attached to Session state must be not null. " + - "Partition transform metadata cannot be saved."); - } + if (hmsTable.isSetPartitionKeys()) { hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys()); hmsTable.setPartitionKeysIsSet(false); } - preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, hmsTable); - sd.setInputFormat(HiveIcebergInputFormat.class.getCanonicalName()); sd.setOutputFormat(HiveIcebergOutputFormat.class.getCanonicalName()); sd.setSerdeInfo(new SerDeInfo("icebergSerde", HiveIcebergSerDe.class.getCanonicalName(), @@ -297,6 +287,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { HiveTableUtil.importFiles(preAlterTableProperties.tableLocation, preAlterTableProperties.format, partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf); } else if (currentAlterTableOp != null) { + Map<String, String> contextProperties = context.getProperties(); switch (currentAlterTableOp) { case REPLACE_COLUMNS: case RENAME_COLUMN: @@ -307,7 +298,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook { break; case ADDPROPS: case DROPPROPS: - alterTableProperties(hmsTable, context.getProperties()); + alterTableProperties(hmsTable, contextProperties); break; case SETPARTITIONSPEC: IcebergTableUtil.updateSpec(conf, icebergTable); @@ -469,23 +460,25 @@ public class HiveIcebergMetaHook implements HiveMetaHook { } } - private static PartitionSpec spec(Configuration configuration, Schema schema, + private static PartitionSpec spec(Configuration configuration, Schema schema, Properties properties, org.apache.hadoop.hive.metastore.api.Table hmsTable) { - Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(), - "We can only handle non-partitioned Hive tables. The Iceberg schema should be in " + - InputFormatConfig.PARTITION_SPEC + " or already converted to a partition transform "); - PartitionSpec spec = IcebergTableUtil.spec(configuration, schema); if (spec != null) { - Preconditions.checkArgument(hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC) == null, + Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(), "Provide only one of the following: Hive partition transform specification, or the " + InputFormatConfig.PARTITION_SPEC + " property"); return spec; } if (hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC) != null) { + Preconditions.checkArgument(!hmsTable.isSetPartitionKeys() || hmsTable.getPartitionKeys().isEmpty(), + "Provide only one of the following: Hive partition specification, or the " + + InputFormatConfig.PARTITION_SPEC + " property"); return PartitionSpecParser.fromJson(schema, hmsTable.getParameters().get(InputFormatConfig.PARTITION_SPEC)); + } else if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) { + // If the table is partitioned then generate the identity partition definitions for the Iceberg table + return HiveSchemaUtil.spec(schema, hmsTable.getPartitionKeys()); } else { return PartitionSpec.unpartitioned(); } diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java index 37ca700..8585fb6 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerWithEngine.java @@ -1875,103 +1875,6 @@ public class TestHiveIcebergStorageHandlerWithEngine { HiveIcebergTestUtils.validateData(expectedResults, HiveIcebergTestUtils.valueForRow(schemaForResultSet, rows), 0); } - @Test - public void testStatWithInsert() { - TableIdentifier identifier = TableIdentifier.of("default", "customers"); - - shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true); - testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, - PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of()); - - if (testTableType != TestTables.TestTableType.HIVE_CATALOG) { - // If the location is set and we have to gather stats, then we have to update the table stats now - shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS"); - } - - String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false); - shell.executeStatement(insert); - - checkColStat(identifier.name(), "customer_id"); - } - - @Test - public void testStatWithInsertOverwrite() { - TableIdentifier identifier = TableIdentifier.of("default", "customers"); - - shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true); - testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, - PartitionSpec.unpartitioned(), fileFormat, ImmutableList.of()); - - String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, true); - shell.executeStatement(insert); - - checkColStat(identifier.name(), "customer_id"); - } - - @Test - public void testStatWithPartitionedInsert() { - TableIdentifier identifier = TableIdentifier.of("default", "customers"); - PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) - .identity("last_name").build(); - - shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true); - testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, - fileFormat, ImmutableList.of()); - - if (testTableType != TestTables.TestTableType.HIVE_CATALOG) { - // If the location is set and we have to gather stats, then we have to update the table stats now - shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS"); - } - - String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false); - shell.executeStatement(insert); - - checkColStat("customers", "customer_id"); - checkColStat("customers", "first_name"); - } - - @Test - public void testStatWithCTAS() { - Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG); - - shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC"); - shell.executeStatement(testTables.getInsertQuery( - HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of("default", "source"), false)); - - shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true); - shell.executeStatement(String.format( - "CREATE TABLE target STORED BY ICEBERG %s TBLPROPERTIES ('%s'='%s') AS SELECT * FROM source", - testTables.locationForCreateTableSQL(TableIdentifier.of("default", "target")), - TableProperties.DEFAULT_FILE_FORMAT, fileFormat)); - - checkColStat("target", "id"); - } - - @Test - public void testStatWithPartitionedCTAS() { - Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG); - - shell.executeStatement("CREATE TABLE source (id bigint, name string) PARTITIONED BY (dept string) STORED AS ORC"); - shell.executeStatement(testTables.getInsertQuery( - HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, TableIdentifier.of("default", "source"), false)); - - shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true); - shell.executeStatement(String.format( - "CREATE TABLE target PARTITIONED BY (dept, name) " + - "STORED BY ICEBERG TBLPROPERTIES ('%s'='%s') AS SELECT * FROM source s", - TableProperties.DEFAULT_FILE_FORMAT, fileFormat)); - - checkColStat("target", "id"); - checkColStat("target", "dept"); - } - - private void checkColStat(String tableName, String colName) { - List<Object[]> rows = shell.executeStatement("DESCRIBE " + tableName + " " + colName); - - Assert.assertEquals(2, rows.size()); - Assert.assertEquals(StatsSetupConst.COLUMN_STATS_ACCURATE, rows.get(1)[0]); - } - private void testComplexTypeWrite(Schema schema, List<Record> records) throws IOException { String tableName = "complex_table"; Table table = testTables.createTable(shell, "complex_table", schema, fileFormat, ImmutableList.of()); diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java index b3c9440..15b40cf 100644 --- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java +++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveShell.java @@ -36,7 +36,6 @@ import org.apache.hive.service.cli.SessionHandle; import org.apache.hive.service.cli.session.HiveSession; import org.apache.hive.service.server.HiveServer2; import org.apache.iceberg.hive.TestHiveMetastore; -import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; /** @@ -158,13 +157,13 @@ public class TestHiveShell { /** * Used for debugging. Please do not remove even if unused in the codebase. - * @param statement The statement to execute - * @return The formatted statement output in a single String which is IDE friendly for viewing + * @param statement EXPLAIN statement + * @return EXPLAIN statement output in a single String which is IDE friendly for viewing */ - public String executeAndStringify(String statement) { + public String executeExplain(String statement) { List<Object[]> objects = executeStatement(statement); return objects.stream() - .map(o -> Joiner.on("\t").useForNull("NULL").join(o)) + .map(o -> (String) o[0]) .collect(Collectors.joining("\n")); } diff --git a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out index dfb7b2c..d22042b 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read.q.out @@ -129,17 +129,17 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_11] - Select Operator [SEL_10] (rows=1 width=372) + Select Operator [SEL_10] (rows=1 width=564) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Group By Operator [GBY_9] (rows=1 width=372) + Group By Operator [GBY_9] (rows=1 width=564) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_8] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Group By Operator [GBY_7] (rows=1 width=372) + Group By Operator [GBY_7] (rows=1 width=564) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal - TableScan [TS_0] (rows=1 width=372) - default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"] + TableScan [TS_0] (rows=1 width=564) + default@tbl_ice_orc_all_types,tbl_ice_orc_all_types,Tbl:COMPLETE,Col:NONE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"] PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_orc_all_types group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java index 595dbab..be9cffa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java @@ -26,7 +26,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Optional; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; @@ -45,7 +44,6 @@ import org.apache.hadoop.hive.metastore.api.SQLForeignKey; import org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint; import org.apache.hadoop.hive.metastore.api.SQLPrimaryKey; import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -58,8 +56,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.PartitionTransform; -import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec; import org.apache.hadoop.hive.ql.parse.ReplicationSpec; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.Explain; @@ -67,7 +63,6 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ValidationUtility; import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.session.SessionStateUtil; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -745,6 +740,10 @@ public class CreateTableDesc implements DDLDesc, Serializable { tbl.getTTable().getParameters().putAll(getTblProps()); } + if (getPartCols() != null) { + tbl.setPartCols(getPartCols()); + } + if (getNumBuckets() != -1) { tbl.setNumBuckets(getNumBuckets()); } @@ -805,26 +804,9 @@ public class CreateTableDesc implements DDLDesc, Serializable { } } - Optional<List<FieldSchema>> cols = Optional.ofNullable(getCols()); - Optional<List<FieldSchema>> partCols = Optional.ofNullable(getPartCols()); - - if (storageHandler != null && storageHandler.alwaysUnpartitioned()) { - tbl.getSd().setCols(new ArrayList<>()); - cols.ifPresent(c -> tbl.getSd().getCols().addAll(c)); - if (partCols.isPresent() && !partCols.get().isEmpty()) { - // Add the partition columns to the normal columns and save the transform to the session state - tbl.getSd().getCols().addAll(partCols.get()); - List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(partCols.get()); - if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) { - throw new HiveException("Query state attached to Session state must be not null. " + - "Partition transform metadata cannot be saved."); - } - } - } else { - cols.ifPresent(c -> tbl.setFields(c)); - partCols.ifPresent(c -> tbl.setPartCols(c)); + if (getCols() != null) { + tbl.setFields(getCols()); } - if (getBucketCols() != null) { tbl.setBucketCols(getBucketCols()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java index f6b0bba..6398cf9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/DagUtils.java @@ -1615,12 +1615,9 @@ public class DagUtils { } else { outputKlass = MROutput.class; } - - // If there is a fileSink add a DataSink to the vertex - boolean hasFileSink = workUnit.getAllOperators().stream().anyMatch(o -> o instanceof FileSinkOperator); // final vertices need to have at least one output boolean endVertex = tezWork.getLeaves().contains(workUnit); - if (endVertex || hasFileSink) { + if (endVertex) { OutputCommitterDescriptor ocd = null; String committer = HiveConf.getVar(conf, ConfVars.TEZ_MAPREDUCE_OUTPUT_COMMITTER); if (committer != null && !committer.isEmpty()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java index 8013ca0..117087a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransform.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hive.ql.parse; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec.TransformType; - import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -29,26 +26,15 @@ import java.util.stream.Stream; public class PartitionTransform { - private static final Map<Integer, TransformType> TRANSFORMS = Stream - .of(new Object[][] { { HiveParser.TOK_IDENTITY, TransformType.IDENTITY }, - { HiveParser.TOK_YEAR, TransformType.YEAR }, - { HiveParser.TOK_MONTH, TransformType.MONTH }, - { HiveParser.TOK_DAY, TransformType.DAY }, - { HiveParser.TOK_HOUR, TransformType.HOUR }, - { HiveParser.TOK_TRUNCATE, TransformType.TRUNCATE }, - { HiveParser.TOK_BUCKET, TransformType.BUCKET } }) - .collect(Collectors.toMap(e -> (Integer) e[0], e -> (TransformType) e[1])); - - /** - * Get the identity transform specification based on the partition columns - * @param fields The partition column fields - * @return list of partition transforms - */ - public static List<PartitionTransformSpec> getPartitionTransformSpec(List<FieldSchema> fields) { - return fields.stream() - .map(field -> new PartitionTransformSpec(field.getName(), TransformType.IDENTITY, Optional.empty())) - .collect(Collectors.toList()); - } + private static final Map<Integer, PartitionTransformSpec.TransformType> TRANSFORMS = Stream + .of(new Object[][] { { HiveParser.TOK_IDENTITY, PartitionTransformSpec.TransformType.IDENTITY }, + { HiveParser.TOK_YEAR, PartitionTransformSpec.TransformType.YEAR }, + { HiveParser.TOK_MONTH, PartitionTransformSpec.TransformType.MONTH }, + { HiveParser.TOK_DAY, PartitionTransformSpec.TransformType.DAY }, + { HiveParser.TOK_HOUR, PartitionTransformSpec.TransformType.HOUR }, + { HiveParser.TOK_TRUNCATE, PartitionTransformSpec.TransformType.TRUNCATE }, + { HiveParser.TOK_BUCKET, PartitionTransformSpec.TransformType.BUCKET } }) + .collect(Collectors.toMap(e -> (Integer) e[0], e -> (PartitionTransformSpec.TransformType) e[1])); /** * Parse the partition transform specifications from the AST Tree node. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java index 108a006..f97752c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java @@ -29,15 +29,6 @@ public class PartitionTransformSpec { private TransformType transformType; private Optional<Integer> transformParam; - public PartitionTransformSpec() { - } - - public PartitionTransformSpec(String columnName, TransformType transformType, Optional<Integer> transformParam) { - this.columnName = columnName; - this.transformType = transformType; - this.transformParam = transformParam; - } - public String getColumnName() { return columnName; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d9b5f81..c17fa4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -7868,8 +7868,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // and it is an insert overwrite or insert into table if (conf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER) && conf.getBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER) - && destinationTable != null - && (!destinationTable.isNonNative() || destinationTable.getStorageHandler().commitInMoveTask()) + && destinationTable != null && !destinationTable.isNonNative() && !destTableIsTemporary && !destTableIsMaterialization && ColumnStatsAutoGatherContext.canRunAutogatherStats(fso)) { if (destType == QBMetaData.DEST_TABLE) { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java index 01df5bc..5bed3d5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/tez/TestTezOutputCommitter.java @@ -122,7 +122,6 @@ public class TestTezOutputCommitter { conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); - conf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); conf.setInt("tez.am.task.max.failed.attempts", MAX_TASK_ATTEMPTS); conf.set("mapred.output.committer.class", committerClass);