Repository: hive Updated Branches: refs/heads/branch-2.2 e8e3974d1 -> 5949479f7
Revert "HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)" This reverts commit e8e3974d15aaaa7a550ec0112258b59b603d9829. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/220d1998 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/220d1998 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/220d1998 Branch: refs/heads/branch-2.2 Commit: 220d199859e373f99e314e50121a149c10351a67 Parents: e8e3974 Author: Aihua Xu <aihu...@apache.org> Authored: Mon Mar 12 14:52:36 2018 -0700 Committer: Aihua Xu <aihu...@apache.org> Committed: Mon Mar 12 14:52:36 2018 -0700 ---------------------------------------------------------------------- .../TablePropertyEnrichmentOptimizer.java | 45 +--- .../avro_tableproperty_optimize.q | 63 ------ .../avro_tableproperty_optimize.q.out | 226 ------------------- 3 files changed, 10 insertions(+), 324 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/220d1998/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java index d313f7d..5824490 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java @@ -26,7 +26,6 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -41,10 +40,8 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.Deserializer; -import org.apache.hive.common.util.ReflectionUtil; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -81,51 +78,29 @@ class TablePropertyEnrichmentOptimizer extends Transform { } } - /** - * Retrieves the table properties as well as the properties from Serde. - */ - private static Map<String, String> getTableParameters(Table table) { - Map<String, String> originalTableParameters = new HashMap<>(table.getParameters()); - Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); - for (String property : tableMetadata.stringPropertyNames()) { - if (!originalTableParameters.containsKey(property)) { - originalTableParameters.put(property, tableMetadata.getProperty(property)); - } - } - return originalTableParameters; - } - private static class Processor implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx context = (WalkerCtx)procCtx; + TableScanDesc tableScanDesc = tsOp.getConf(); Table table = tsOp.getConf().getTableMetadata().getTTable(); + Map<String, String> tableParameters = table.getParameters(); + Properties tableProperties = new Properties(); + tableProperties.putAll(tableParameters); - Map<String, String> originalTableParameters = getTableParameters(table); - if (LOG.isDebugEnabled()) { - LOG.debug("Original Table parameters: " + originalTableParameters); - } - Properties clonedTableParameters = new Properties(); - clonedTableParameters.putAll(originalTableParameters); - - String deserializerClassName = null; + Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer(); + String deserializerClassName = deserializer.getClass().getName(); try { - deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib(); - Deserializer deserializer = ReflectionUtil.newInstance( - context.conf.getClassByName(deserializerClassName) - .asSubclass(Deserializer.class), - context.conf); - if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) { - deserializer.initialize(context.conf, clonedTableParameters); + deserializer.initialize(context.conf, tableProperties); LOG.debug("SerDe init succeeded for class: " + deserializerClassName); - for (Map.Entry property : clonedTableParameters.entrySet()) { - if (!property.getValue().equals(originalTableParameters.get(property.getKey()))) { + for (Map.Entry property : tableProperties.entrySet()) { + if (!property.getValue().equals(tableParameters.get(property.getKey()))) { LOG.debug("Resolving changed parameters! key=" + property.getKey() + ", value=" + property.getValue()); - table.getParameters().put((String) property.getKey(), (String) property.getValue()); + tableParameters.put((String) property.getKey(), (String) property.getValue()); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/220d1998/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q deleted file mode 100644 index e6b75c6..0000000 --- a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q +++ /dev/null @@ -1,63 +0,0 @@ --- Check the queries work fine with the following property set to true -SET hive.optimize.update.table.properties.from.serde=true; - -dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; - -CREATE TABLE avro_extschema_literal -STORED AS AVRO -TBLPROPERTIES ('avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }'); -INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2'); - -DESCRIBE EXTENDED avro_extschema_literal; -SELECT * FROM avro_extschema_literal; - -CREATE TABLE avro_extschema_url -STORED AS AVRO -TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc'); -INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2'); - -DESCRIBE EXTENDED avro_extschema_url; -SELECT * FROM avro_extschema_url; - -CREATE TABLE avro_extschema_literal1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -WITH SERDEPROPERTIES ( -'avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }') -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; -INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2'); - -DESCRIBE EXTENDED avro_extschema_literal1; -SELECT * FROM avro_extschema_literal1; - -CREATE TABLE avro_extschema_url1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -WITH SERDEPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc') -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; -INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2'); - -DESCRIBE EXTENDED avro_extschema_url1; -SELECT * FROM avro_extschema_url1; http://git-wip-us.apache.org/repos/asf/hive/blob/220d1998/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out deleted file mode 100644 index 8660c44..0000000 --- a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out +++ /dev/null @@ -1,226 +0,0 @@ -PREHOOK: query: CREATE TABLE avro_extschema_literal -STORED AS AVRO -TBLPROPERTIES ('avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }') -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_extschema_literal -POSTHOOK: query: CREATE TABLE avro_extschema_literal -STORED AS AVRO -TBLPROPERTIES ('avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }') -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_extschema_literal -PREHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@avro_extschema_literal -POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@avro_extschema_literal -POSTHOOK: Lineage: avro_extschema_literal.col1 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_literal.col2 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_literal.col3 SCRIPT [] -PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_extschema_literal -POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_extschema_literal -col1 string -col2 bigint -col3 string - -#### A masked pattern was here #### -PREHOOK: query: SELECT * FROM avro_extschema_literal -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_extschema_literal -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_extschema_literal -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_extschema_literal -#### A masked pattern was here #### -s1 1 s2 -PREHOOK: query: CREATE TABLE avro_extschema_url -STORED AS AVRO -#### A masked pattern was here #### -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_extschema_url -POSTHOOK: query: CREATE TABLE avro_extschema_url -STORED AS AVRO -#### A masked pattern was here #### -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_extschema_url -PREHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@avro_extschema_url -POSTHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@avro_extschema_url -POSTHOOK: Lineage: avro_extschema_url.col1 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_url.col2 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_url.col3 SCRIPT [] -PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_extschema_url -POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_extschema_url -col1 string -col2 bigint -col3 string - -#### A masked pattern was here #### -PREHOOK: query: SELECT * FROM avro_extschema_url -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_extschema_url -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_extschema_url -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_extschema_url -#### A masked pattern was here #### -s1 1 s2 -PREHOOK: query: CREATE TABLE avro_extschema_literal1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -WITH SERDEPROPERTIES ( -'avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }') -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_extschema_literal1 -POSTHOOK: query: CREATE TABLE avro_extschema_literal1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -WITH SERDEPROPERTIES ( -'avro.schema.literal'='{ - "namespace": "org.apache.hive", - "name": "ext_schema", - "type": "record", - "fields": [ - { "name":"col1", "type":"string" }, - { "name":"col2", "type":"long" }, - { "name":"col3", "type":"string" } - ] }') -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_extschema_literal1 -PREHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@avro_extschema_literal1 -POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@avro_extschema_literal1 -POSTHOOK: Lineage: avro_extschema_literal1.col1 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_literal1.col2 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_literal1.col3 SCRIPT [] -PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_extschema_literal1 -POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_extschema_literal1 -col1 string -col2 bigint -col3 string - -#### A masked pattern was here #### -PREHOOK: query: SELECT * FROM avro_extschema_literal1 -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_extschema_literal1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_extschema_literal1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_extschema_literal1 -#### A masked pattern was here #### -s1 1 s2 -PREHOOK: query: CREATE TABLE avro_extschema_url1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -#### A masked pattern was here #### -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_extschema_url1 -POSTHOOK: query: CREATE TABLE avro_extschema_url1 -ROW FORMAT SERDE - 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -#### A masked pattern was here #### -STORED AS INPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT - 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_extschema_url1 -PREHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@avro_extschema_url1 -POSTHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@avro_extschema_url1 -POSTHOOK: Lineage: avro_extschema_url1.col1 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_url1.col2 SCRIPT [] -POSTHOOK: Lineage: avro_extschema_url1.col3 SCRIPT [] -PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url1 -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_extschema_url1 -POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url1 -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_extschema_url1 -col1 string -col2 bigint -col3 string - -#### A masked pattern was here #### -PREHOOK: query: SELECT * FROM avro_extschema_url1 -PREHOOK: type: QUERY -PREHOOK: Input: default@avro_extschema_url1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM avro_extschema_url1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@avro_extschema_url1 -#### A masked pattern was here #### -s1 1 s2