[2/2] hive git commit: HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)
HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e10b88c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e10b88c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e10b88c Branch: refs/heads/branch-2.2 Commit: 9e10b88c33a3ae9cb09cb230c4ae09e442203ba9 Parents: a20e63e Author: Aihua XuAuthored: Thu Mar 8 11:33:37 2018 -0800 Committer: Aihua Xu Committed: Mon Mar 12 15:01:25 2018 -0700 -- .../TablePropertyEnrichmentOptimizer.java | 45 +++- .../avro_tableproperty_optimize.q | 63 ++ .../avro_tableproperty_optimize.q.out | 226 +++ 3 files changed, 324 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/9e10b88c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java index 5824490..154eb02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java @@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hive.common.util.ReflectionUtil; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform { } } + /** + * Retrieves the table properties as well as the properties from Serde. + */ + private static Map getTableParameters(Table table) { +Map originalTableParameters = new HashMap<>(table.getParameters()); +Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); +for (String property : tableMetadata.stringPropertyNames()) { + if (!originalTableParameters.containsKey(property)) { +originalTableParameters.put(property, tableMetadata.getProperty(property)); + } +} +return originalTableParameters; + } + private static class Processor implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx context = (WalkerCtx)procCtx; - TableScanDesc tableScanDesc = tsOp.getConf(); Table table = tsOp.getConf().getTableMetadata().getTTable(); - Map tableParameters = table.getParameters(); - Properties tableProperties = new Properties(); - tableProperties.putAll(tableParameters); - Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer(); - String deserializerClassName = deserializer.getClass().getName(); + Map originalTableParameters = getTableParameters(table); + if (LOG.isDebugEnabled()) { +LOG.debug("Original Table parameters: " + originalTableParameters); + } + Properties clonedTableParameters = new Properties(); + clonedTableParameters.putAll(originalTableParameters); + + String deserializerClassName = null; try { +deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib(); +Deserializer deserializer = ReflectionUtil.newInstance( +context.conf.getClassByName(deserializerClassName) +.asSubclass(Deserializer.class), +context.conf); + if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) { - deserializer.initialize(context.conf, tableProperties); + deserializer.initialize(context.conf, clonedTableParameters); LOG.debug("SerDe init succeeded for class: " + deserializerClassName); - for (Map.Entry property
[2/2] hive git commit: HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum)
HIVE-14792: AvroSerde reads the remote schema-file at least once per mapper, per table reference. (Addendum) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5949479f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5949479f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5949479f Branch: refs/heads/branch-2.2 Commit: 5949479f7e08987b67c4ee86c06c2d5949f75bee Parents: 220d199 Author: Aihua XuAuthored: Thu Mar 8 11:33:37 2018 -0800 Committer: Aihua Xu Committed: Mon Mar 12 14:52:53 2018 -0700 -- .../TablePropertyEnrichmentOptimizer.java | 45 +++- .../avro_tableproperty_optimize.q | 63 ++ .../avro_tableproperty_optimize.q.out | 226 +++ 3 files changed, 324 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/5949479f/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java index 5824490..d313f7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TablePropertyEnrichmentOptimizer.java @@ -26,6 +26,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -40,8 +41,10 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.serde2.Deserializer; +import org.apache.hive.common.util.ReflectionUtil; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; @@ -78,29 +81,51 @@ class TablePropertyEnrichmentOptimizer extends Transform { } } + /** + * Retrieves the table properties as well as the properties from Serde. + */ + private static Map getTableParameters(Table table) { +Map originalTableParameters = new HashMap<>(table.getParameters()); +Properties tableMetadata = MetaStoreUtils.getTableMetadata(table); +for (String property : tableMetadata.stringPropertyNames()) { + if (!originalTableParameters.containsKey(property)) { +originalTableParameters.put(property, tableMetadata.getProperty(property)); + } +} +return originalTableParameters; + } + private static class Processor implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { TableScanOperator tsOp = (TableScanOperator) nd; WalkerCtx context = (WalkerCtx)procCtx; - TableScanDesc tableScanDesc = tsOp.getConf(); Table table = tsOp.getConf().getTableMetadata().getTTable(); - Map tableParameters = table.getParameters(); - Properties tableProperties = new Properties(); - tableProperties.putAll(tableParameters); - Deserializer deserializer = tableScanDesc.getTableMetadata().getDeserializer(); - String deserializerClassName = deserializer.getClass().getName(); + Map originalTableParameters = getTableParameters(table); + if (LOG.isDebugEnabled()) { +LOG.debug("Original Table parameters: " + originalTableParameters); + } + Properties clonedTableParameters = new Properties(); + clonedTableParameters.putAll(originalTableParameters); + + String deserializerClassName = null; try { +deserializerClassName = tableScanDesc.getTableMetadata().getSd().getSerdeInfo().getSerializationLib(); +Deserializer deserializer = ReflectionUtil.newInstance( +context.conf.getClassByName(deserializerClassName) +.asSubclass(Deserializer.class), +context.conf); + if (context.serdeClassesUnderConsideration.contains(deserializerClassName)) { - deserializer.initialize(context.conf, tableProperties); + deserializer.initialize(context.conf, clonedTableParameters); LOG.debug("SerDe init succeeded for class: " + deserializerClassName); - for (Map.Entry