Repository: hive
Updated Branches:
  refs/heads/branch-2.2 5fee15988 -> e8e3974d1


HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, 
per table reference. (Addendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e8e3974d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e8e3974d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e8e3974d

Branch: refs/heads/branch-2.2
Commit: e8e3974d15aaaa7a550ec0112258b59b603d9829
Parents: 5fee159
Author: Aihua Xu <aihu...@apache.org>
Authored: Thu Mar 8 11:33:37 2018 -0800
Committer: Aihua Xu <aihu...@apache.org>
Committed: Mon Mar 12 14:39:50 2018 -0700

----------------------------------------------------------------------
 .../TablePropertyEnrichmentOptimizer.java       |  45 +++-
 .../avro_tableproperty_optimize.q               |  63 ++++++
 .../avro_tableproperty_optimize.q.out           | 226 +++++++++++++++++++
 3 files changed, 324 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/e8e3974d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
index 5824490..d313f7d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java
@@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
@@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hive.common.util.ReflectionUtil;
 
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform {
     }
   }
 
+  /**
+   * Retrieves the table properties as well as the properties from Serde.
+   */
+  private static Map<String, String> getTableParameters(Table table) {
+    Map<String, String> originalTableParameters = new 
HashMap<>(table.getParameters());
+    Properties tableMetadata = MetaStoreUtils.getTableMetadata(table);
+    for (String property : tableMetadata.stringPropertyNames()) {
+      if (!originalTableParameters.containsKey(property)) {
+        originalTableParameters.put(property, 
tableMetadata.getProperty(property));
+      }
+    }
+    return originalTableParameters;
+  }
+
   private static class Processor implements NodeProcessor {
 
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx 
procCtx, Object... nodeOutputs) throws SemanticException {
       TableScanOperator tsOp = (TableScanOperator) nd;
       WalkerCtx context = (WalkerCtx)procCtx;
-
       TableScanDesc tableScanDesc = tsOp.getConf();
       Table table = tsOp.getConf().getTableMetadata().getTTable();
-      Map<String, String> tableParameters = table.getParameters();
-      Properties tableProperties = new Properties();
-      tableProperties.putAll(tableParameters);
 
-      Deserializer deserializer = 
tableScanDesc.getTableMetadata().getDeserializer();
-      String deserializerClassName = deserializer.getClass().getName();
+      Map<String, String> originalTableParameters = getTableParameters(table);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Original Table parameters: " + originalTableParameters);
+      }
+      Properties clonedTableParameters = new Properties();
+      clonedTableParameters.putAll(originalTableParameters);
+
+      String deserializerClassName = null;
       try {
+        deserializerClassName = 
tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib();
+        Deserializer deserializer = ReflectionUtil.newInstance(
+            context.conf.getClassByName(deserializerClassName)
+                .asSubclass(Deserializer.class),
+            context.conf);
+
         if 
(context.serdeClassesUnderConsideration.contains(deserializerClassName)) {
-          deserializer.initialize(context.conf, tableProperties);
+          deserializer.initialize(context.conf, clonedTableParameters);
           LOG.debug("SerDe init succeeded for class: " + 
deserializerClassName);
-          for (Map.Entry property : tableProperties.entrySet()) {
-            if 
(!property.getValue().equals(tableParameters.get(property.getKey()))) {
+          for (Map.Entry property : clonedTableParameters.entrySet()) {
+            if 
(!property.getValue().equals(originalTableParameters.get(property.getKey()))) {
               LOG.debug("Resolving changed parameters! key=" + 
property.getKey() + ", value=" + property.getValue());
-              tableParameters.put((String) property.getKey(), (String) 
property.getValue());
+              table.getParameters().put((String) property.getKey(), (String) 
property.getValue());
             }
           }
         }

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e3974d/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q 
b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
new file mode 100644
index 0000000..e6b75c6
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/avro_tableproperty_optimize.q
@@ -0,0 +1,63 @@
+-- Check the queries work fine with the following property set to true
+SET hive.optimize.update.table.properties.from.serde=true;
+
+dfs -cp ${system:hive.root}data/files/table1.avsc ${system:test.tmp.dir}/;
+
+CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }');
+INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal;
+SELECT * FROM avro_extschema_literal;
+
+CREATE TABLE avro_extschema_url
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc');
+INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url;
+SELECT * FROM avro_extschema_url;
+
+CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_literal1;
+SELECT * FROM avro_extschema_literal1;
+
+CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES ('avro.schema.url'='${system:test.tmp.dir}/table1.avsc')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat';
+INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2');
+
+DESCRIBE EXTENDED avro_extschema_url1;
+SELECT * FROM avro_extschema_url1;

http://git-wip-us.apache.org/repos/asf/hive/blob/e8e3974d/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out 
b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
new file mode 100644
index 0000000..8660c44
--- /dev/null
+++ b/ql/src/test/results/clientpositive/avro_tableproperty_optimize.q.out
@@ -0,0 +1,226 @@
+PREHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: CREATE TABLE avro_extschema_literal
+STORED AS AVRO
+TBLPROPERTIES ('avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal
+POSTHOOK: Lineage: avro_extschema_literal.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal
+col1                   string                                      
+col2                   bigint                                      
+col3                   string                                      
+                
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal
+#### A masked pattern was here ####
+s1     1       s2
+PREHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: CREATE TABLE avro_extschema_url
+STORED AS AVRO
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url
+POSTHOOK: Lineage: avro_extschema_url.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url
+col1                   string                                      
+col2                   bigint                                      
+col3                   string                                      
+                
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url
+#### A masked pattern was here ####
+s1     1       s2
+PREHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: CREATE TABLE avro_extschema_literal1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+WITH SERDEPROPERTIES (
+'avro.schema.literal'='{
+  "namespace": "org.apache.hive",
+  "name": "ext_schema",
+  "type": "record",
+  "fields": [
+    { "name":"col1", "type":"string" },
+    { "name":"col2", "type":"long" },
+    { "name":"col3", "type":"string" }
+  ] }')
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_literal1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_literal1 VALUES('s1', 1, 
's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_literal1
+POSTHOOK: Lineage: avro_extschema_literal1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_literal1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_literal1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_literal1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_literal1
+col1                   string                                      
+col2                   bigint                                      
+col3                   string                                      
+                
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_literal1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_literal1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_literal1
+#### A masked pattern was here ####
+s1     1       s2
+PREHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: CREATE TABLE avro_extschema_url1
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
+#### A masked pattern was here ####
+STORED AS INPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
+OUTPUTFORMAT
+  'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@avro_extschema_url1
+PREHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@avro_extschema_url1
+POSTHOOK: query: INSERT INTO TABLE avro_extschema_url1 VALUES('s1', 1, 's2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@avro_extschema_url1
+POSTHOOK: Lineage: avro_extschema_url1.col1 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col2 SCRIPT []
+POSTHOOK: Lineage: avro_extschema_url1.col3 SCRIPT []
+PREHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@avro_extschema_url1
+POSTHOOK: query: DESCRIBE EXTENDED avro_extschema_url1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@avro_extschema_url1
+col1                   string                                      
+col2                   bigint                                      
+col3                   string                                      
+                
+#### A masked pattern was here ####
+PREHOOK: query: SELECT * FROM avro_extschema_url1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM avro_extschema_url1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@avro_extschema_url1
+#### A masked pattern was here ####
+s1     1       s2

Reply via email to