Repository: hive Updated Branches: refs/heads/branch-2 cbcd846b7 -> ad5a2fa0e
HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad5a2fa0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad5a2fa0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad5a2fa0 Branch: refs/heads/branch-2 Commit: ad5a2fa0eed89bf88a4287965274c4988e71ee98 Parents: cbcd846 Author: Aihua Xu <aihu...@apache.org> Authored: Thu Mar 8 11:33:37 2018 -0800 Committer: Aihua Xu <aihu...@apache.org> Committed: Mon Mar 12 14:51:43 2018 -0700 ---------------------------------------------------------------------- .../TablePropertyEnrichmentOptimizer.java | 45 +++- .../avro_tableproperty_optimize.q | 63 ++++++ .../avro_tableproperty_optimize.q.out | 226 +++++++++++++++++++ 3 files changed, 324 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ad5a2fa0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java index 5824490..154eb02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java @@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hive.common.util.ReflectionUtil; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform { } } + /** + * Retrieves the table properties as well as the properties from Serde. + */ + private static Map<String, String> getTableParameters(Table table) { + Map<String, String> originalTableParameters = new HashMap<>(table.getParameters()); + Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); + for (String property : tableMetadata.stringPropertyNames()) { + if (!originalTableParameters.containsKey(property)) { + originalTableParameters.put(property, tableMetadata.getProperty(property)); + } + } + return originalTableParameters; + } + private static class Processor implements NodeProcessor { @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx context = (WalkerCtx)procCtx; - TableScanDesc tableScanDesc = tsOp.getConf(); Table table = tsOp.getConf().getTableMetadata().getTTable(); - Map<String, String> tableParameters = table.getParameters(); - Properties tableProperties = new Properties(); - tableProperties.putAll(tableParameters); - Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer(); - String deserializerClassName = deserializer.getClass().getName(); + Map<String, String> originalTableParameters = getTableParameters(table); + if (LOG.isDebugEnabled()) { + LOG.debug("Original Table parameters: " + originalTableParameters); + } + Properties clonedTableParameters = new Properties(); + clonedTableParameters.putAll(originalTableParameters); + + String deserializerClassName = null; try { + deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib(); + Deserializer deserializer = ReflectionUtil.newInstance( + context.conf.getClassByName(deserializerClassName) + .asSubclass(Deserializer.class), + context.conf); + if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) { - deserializer.initialize(context.conf, tableProperties); + deserializer.initialize(context.conf, clonedTableParameters); LOG.debug("SerDe init succeeded for class: " + deserializerClassName); - for (Map.Entry property : tableProperties.entrySet()) { - if (!property.getValue().equals(tableParameters.get(property.getKey()))) { + for (Map.Entry property : clonedTableParameters.entrySet()) { + if (!property.getValue().equals(originalTableParameters.get(property.getKey()))) { LOG.debug("Resolving changed parameters! key=" + property.getKey() + ", value=" + property.getValue()); - tableParameters.put((String) property.getKey(), (String) property.getValue()); + table.getParameters().put((String) property.getKey(), (String) property.getValue()); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/ad5a2fa0/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q new file mode 100644 index 0000000..e6b75c6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q @@ -0,0 +1,63 @@ +-- Check the queries work fine with the following property set to true +SET hive.optimize.update.table.properties.from.serde=true; + +dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/; + +CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }'); +INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2'); + +DESCRIBE EXTENDED avro_extschema_literal; +SELECT * FROM avro_extschema_literal; + +CREATE TABLE avro_extschema_url +STORED AS AVRO +TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc'); +INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2'); + +DESCRIBE EXTENDED avro_extschema_url; +SELECT * FROM avro_extschema_url; + +CREATE TABLE avro_extschema_literal1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ( +'avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; +INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2'); + +DESCRIBE EXTENDED avro_extschema_literal1; +SELECT * FROM avro_extschema_literal1; + +CREATE TABLE avro_extschema_url1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; +INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2'); + +DESCRIBE EXTENDED avro_extschema_url1; +SELECT * FROM avro_extschema_url1; http://git-wip-us.apache.org/repos/asf/hive/blob/ad5a2fa0/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out new file mode 100644 index 0000000..8660c44 --- /dev/null +++ b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: CREATE TABLE avro_extschema_literal +STORED AS AVRO +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_literal +PREHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@avro_extschema_literal +POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@avro_extschema_literal +POSTHOOK: Lineage: avro_extschema_literal.col1 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_literal.col2 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_literal.col3 SCRIPT [] +PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_literal +POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_literal +col1 string +col2 bigint +col3 string + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM avro_extschema_literal +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_literal +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_extschema_literal +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_literal +#### A masked pattern was here #### +s1 1 s2 +PREHOOK: query: CREATE TABLE avro_extschema_url +STORED AS AVRO +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: CREATE TABLE avro_extschema_url +STORED AS AVRO +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_url +PREHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@avro_extschema_url +POSTHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@avro_extschema_url +POSTHOOK: Lineage: avro_extschema_url.col1 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_url.col2 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_url.col3 SCRIPT [] +PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_url +POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_url +col1 string +col2 bigint +col3 string + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM avro_extschema_url +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_url +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_extschema_url +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_url +#### A masked pattern was here #### +s1 1 s2 +PREHOOK: query: CREATE TABLE avro_extschema_literal1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ( +'avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_literal1 +POSTHOOK: query: CREATE TABLE avro_extschema_literal1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ( +'avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "ext_schema", + "type": "record", + "fields": [ + { "name":"col1", "type":"string" }, + { "name":"col2", "type":"long" }, + { "name":"col3", "type":"string" } + ] }') +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_literal1 +PREHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@avro_extschema_literal1 +POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@avro_extschema_literal1 +POSTHOOK: Lineage: avro_extschema_literal1.col1 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_literal1.col2 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_literal1.col3 SCRIPT [] +PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_literal1 +POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_literal1 +col1 string +col2 bigint +col3 string + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM avro_extschema_literal1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_literal1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_extschema_literal1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_literal1 +#### A masked pattern was here #### +s1 1 s2 +PREHOOK: query: CREATE TABLE avro_extschema_url1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +#### A masked pattern was here #### +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_extschema_url1 +POSTHOOK: query: CREATE TABLE avro_extschema_url1 +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +#### A masked pattern was here #### +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_extschema_url1 +PREHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@avro_extschema_url1 +POSTHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@avro_extschema_url1 +POSTHOOK: Lineage: avro_extschema_url1.col1 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_url1.col2 SCRIPT [] +POSTHOOK: Lineage: avro_extschema_url1.col3 SCRIPT [] +PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_extschema_url1 +POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_extschema_url1 +col1 string +col2 bigint +col3 string + +#### A masked pattern was here #### +PREHOOK: query: SELECT * FROM avro_extschema_url1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_extschema_url1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_extschema_url1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_extschema_url1 +#### A masked pattern was here #### +s1 1 s2