Repository: hive Updated Branches: refs/heads/branch-2.0 1420e65f5 -> a73816a13
HIVE-12526 : PerfLogger for hive compiler and optimizer (Hari Subramaniyan, reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/592179c4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/592179c4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/592179c4 Branch: refs/heads/branch-2.0 Commit: 592179c40cc1953ea7ac8a8468c6a5470ffdd4cd Parents: 178a6bf Author: Hari Subramaniyan <harisan...@apache.org> Authored: Mon Dec 14 17:18:34 2015 -0800 Committer: Hari Subramaniyan <harisan...@apache.org> Committed: Fri Dec 18 12:45:07 2015 -0800 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/log/PerfLogger.java | 1 + .../ql/optimizer/BucketMapJoinOptimizer.java | 3 +- .../BucketingSortingReduceSinkOptimizer.java | 2 +- .../hadoop/hive/ql/optimizer/ColumnPruner.java | 2 +- .../hive/ql/optimizer/ConstantPropagate.java | 2 +- .../optimizer/FixedBucketPruningOptimizer.java | 2 +- .../hive/ql/optimizer/GlobalLimitOptimizer.java | 2 +- .../hive/ql/optimizer/GroupByOptimizer.java | 2 +- .../ql/optimizer/IdentityProjectRemover.java | 2 +- .../hadoop/hive/ql/optimizer/JoinReorder.java | 2 +- .../ql/optimizer/LimitPushdownOptimizer.java | 2 +- .../hive/ql/optimizer/MapJoinProcessor.java | 2 +- .../ql/optimizer/NonBlockingOpDeDupProc.java | 2 +- .../hadoop/hive/ql/optimizer/Optimizer.java | 4 +- .../ql/optimizer/PartitionColumnsSeparator.java | 2 +- .../hive/ql/optimizer/PointLookupOptimizer.java | 2 +- .../hadoop/hive/ql/optimizer/SamplePruner.java | 2 +- .../ql/optimizer/SimpleFetchAggregation.java | 2 +- .../hive/ql/optimizer/SimpleFetchOptimizer.java | 2 +- .../hive/ql/optimizer/SkewJoinOptimizer.java | 2 +- .../optimizer/SortedDynPartitionOptimizer.java | 2 +- .../SortedMergeBucketMapJoinOptimizer.java | 2 +- .../hive/ql/optimizer/StatsOptimizer.java | 2 +- .../hadoop/hive/ql/optimizer/Transform.java | 20 ++++++- .../translator/HiveOpConverterPostProc.java | 2 +- .../correlation/CorrelationOptimizer.java | 2 +- .../correlation/ReduceSinkDeDuplication.java | 2 +- .../ql/optimizer/index/RewriteGBUsingIndex.java | 2 +- .../hive/ql/optimizer/lineage/Generator.java | 2 +- .../ListBucketingPruner.java | 2 +- .../annotation/AnnotateWithOpTraits.java | 2 +- .../pcr/PartitionConditionRemover.java | 2 +- .../hive/ql/optimizer/ppr/PartitionPruner.java | 2 +- .../annotation/AnnotateWithStatistics.java | 2 +- .../ql/optimizer/unionproc/UnionProcessor.java | 2 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 57 +++++++++++++++++++- .../hadoop/hive/ql/parse/TezCompiler.java | 6 ++- .../hadoop/hive/ql/ppd/PredicatePushDown.java | 2 +- .../ql/ppd/PredicateTransitivePropagate.java | 2 +- .../hive/ql/ppd/SyntheticJoinPredicate.java | 2 +- 40 files changed, 118 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java index 1ef636c..f6eb64b 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java @@ -39,6 +39,7 @@ public class PerfLogger { public static final String COMPILE = "compile"; public static final String PARSE = "parse"; public static final String ANALYZE = "semanticAnalyze"; + public static final String OPTIMIZER = "optimizer"; public static final String DO_AUTHORIZATION = "doAuthorization"; public static final String DRIVER_EXECUTE = "Driver.execute"; public static final String INPUT_SUMMARY = "getInputSummary"; http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java index 750427a..a649fdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java @@ -41,7 +41,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; /** * this transformation does bucket map join optimization. */ -public class BucketMapJoinOptimizer implements Transform { +public class BucketMapJoinOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(GroupByOptimizer.class .getName()); @@ -49,7 +49,6 @@ public class BucketMapJoinOptimizer implements Transform { public BucketMapJoinOptimizer() { } - @Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map<Rule, NodeProcessor> opRules = new LinkedHashMap<Rule, NodeProcessor>(); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index f59ab4b..312a02c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -72,7 +72,7 @@ import org.apache.hadoop.hive.shims.ShimLoader; * where T1, T2 and T3 are bucketized/sorted on the same key 'key', we don't need a reducer * to enforce bucketing and sorting */ -public class BucketingSortingReduceSinkOptimizer implements Transform { +public class BucketingSortingReduceSinkOptimizer extends Transform { public BucketingSortingReduceSinkOptimizer() { } http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java index 561b8fc..c353e3e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPruner.java @@ -55,7 +55,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * this changes the row resolver, the tree is built again. This can be optimized * later to patch the tree. */ -public class ColumnPruner implements Transform { +public class ColumnPruner extends Transform { protected ParseContext pGraphContext; /** http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java index 136b5e1..6ff151d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java @@ -59,7 +59,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * 3. Propagate expression: if the expression is an assignment like column=constant, the expression * will be propagate to parents to see if further folding operation is possible. */ -public class ConstantPropagate implements Transform { +public class ConstantPropagate extends Transform { private static final Logger LOG = LoggerFactory.getLogger(ConstantPropagate.class); protected ParseContext pGraphContext; http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java index c036db0..c63318e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FixedBucketPruningOptimizer.java @@ -60,7 +60,7 @@ import com.google.common.base.Preconditions; * Fixed bucket pruning optimizer goes through all the table scans and annotates them * with a bucketing inclusion bit-set. */ -public class FixedBucketPruningOptimizer implements Transform { +public class FixedBucketPruningOptimizer extends Transform { private static final Log LOG = LogFactory .getLog(FixedBucketPruningOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java index 6b04d92..74d6292 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java @@ -61,7 +61,7 @@ import com.google.common.collect.Multimap; * needed by this query. * This optimizer step populates the GlobalLimitCtx which is used later on to prune the inputs. */ -public class GlobalLimitOptimizer implements Transform { +public class GlobalLimitOptimizer extends Transform { private final Logger LOG = LoggerFactory.getLogger(GlobalLimitOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java index fe459f7..8f3ce60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java @@ -69,7 +69,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; * of the bucketing and sorting keys of the underlying table in the same order, the * group by can be be performed on the map-side completely. */ -public class GroupByOptimizer implements Transform { +public class GroupByOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(GroupByOptimizer.class .getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java index 114c683..48021cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java @@ -68,7 +68,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; * Note absence of select operator after filter and after join operator. * Also, see : identity_proj_remove.q */ -public class IdentityProjectRemover implements Transform { +public class IdentityProjectRemover extends Transform { private static final Logger LOG = LoggerFactory.getLogger(IdentityProjectRemover.class); @Override http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java index e31dc9b..b385c90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/JoinReorder.java @@ -37,7 +37,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; * largest tag so that they are processed last. In future, once statistics are * implemented, this transformation can also be done based on costs. */ -public class JoinReorder implements Transform { +public class JoinReorder extends Transform { private final Map<Operator<?>, Integer> cache = new IdentityHashMap<Operator<?>, Integer>(); /** http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java index e850550..5a55061 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/LimitPushdownOptimizer.java @@ -85,7 +85,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * REDUCER : 10(b,d)-20(l)-30(c,k,o)-40(j,m)-50(n) * LIMIT 3 : 10(b,d)-20(l) */ -public class LimitPushdownOptimizer implements Transform { +public class LimitPushdownOptimizer extends Transform { @Override public ParseContext transform(ParseContext pctx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index b4276e4..2e3f930 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -86,7 +86,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; * the reduce sink operator above the join are converted to map sink operators. In future, once * statistics are implemented, this transformation can also be done based on costs. */ -public class MapJoinProcessor implements Transform { +public class MapJoinProcessor extends Transform { // mapjoin table descriptor contains a key descriptor which needs the field schema // (column type + column name). The column name is not really used anywhere, but it http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java index 2764ae1..37dbe32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/NonBlockingOpDeDupProc.java @@ -54,7 +54,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * merges SEL-SEL or FIL-FIL into single operator */ -public class NonBlockingOpDeDupProc implements Transform { +public class NonBlockingOpDeDupProc extends Transform { @Override public ParseContext transform(ParseContext pctx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 4a7fc0d..8f48e7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -224,7 +224,9 @@ public class Optimizer { */ public ParseContext optimize() throws SemanticException { for (Transform t : transformations) { - pctx = t.transform(pctx); + t.beginPerfLogging(); + pctx = t.transform(pctx); + t.endPerfLogging(t.toString()); } return pctx; } http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java index f71f37c..aea28e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PartitionColumnsSeparator.java @@ -62,7 +62,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; * This predicate is in turn used by the partition pruner to prune the columns that are not * part of the original IN(STRUCT(..)..) predicate. */ -public class PartitionColumnsSeparator implements Transform { +public class PartitionColumnsSeparator extends Transform { private static final Log LOG = LogFactory.getLog(PointLookupOptimizer.class); private static final String IN_UDF = http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java index a1a49cd..44972bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/PointLookupOptimizer.java @@ -64,7 +64,7 @@ import com.google.common.collect.ListMultimap; * AND operator children, the optimization might generate an IN clause that uses * structs. */ -public class PointLookupOptimizer implements Transform { +public class PointLookupOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(PointLookupOptimizer.class); private static final String IN_UDF = http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java index 2c473b0..dd679db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SamplePruner.java @@ -54,7 +54,7 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc.SampleDesc; * The transformation step that does sample pruning. * */ -public class SamplePruner implements Transform { +public class SamplePruner extends Transform { /** * SamplePrunerCtx. http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java index 39e11a2..f71fd3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java @@ -51,7 +51,7 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; // execute final aggregation stage for simple fetch query on fetch task -public class SimpleFetchAggregation implements Transform { +public class SimpleFetchAggregation extends Transform { @Override public ParseContext transform(ParseContext pctx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 9b9a5ca..d631007 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -87,7 +87,7 @@ import org.apache.hadoop.mapred.JobConf; * Tries to convert simple fetch query to single fetch task, which fetches rows directly * from location of table/partition. */ -public class SimpleFetchOptimizer implements Transform { +public class SimpleFetchOptimizer extends Transform { private final Logger LOG = LoggerFactory.getLogger(SimpleFetchOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java index 64dc48c..9525271 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SkewJoinOptimizer.java @@ -75,7 +75,7 @@ import org.slf4j.LoggerFactory; * SkewJoinOptimizer. * */ -public class SkewJoinOptimizer implements Transform { +public class SkewJoinOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(SkewJoinOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java index c3553a5..d4be5cf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java @@ -76,7 +76,7 @@ import com.google.common.collect.Maps; * into the destination table. This enables reducers to keep only one record writer all the time * thereby reducing the the memory pressure on the reducers. */ -public class SortedDynPartitionOptimizer implements Transform { +public class SortedDynPartitionOptimizer extends Transform { private static final String BUCKET_NUMBER_COL_NAME = "_bucket_number"; @Override http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java index 5aeeeb8..6aa5335 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedMergeBucketMapJoinOptimizer.java @@ -44,7 +44,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; //try to replace a bucket map join with a sorted merge map join -public class SortedMergeBucketMapJoinOptimizer implements Transform { +public class SortedMergeBucketMapJoinOptimizer extends Transform { private static final Logger LOG = LoggerFactory .getLogger(SortedMergeBucketMapJoinOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java index ffe706e..03c1c3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java @@ -89,7 +89,7 @@ import com.google.common.collect.Lists; * Optimizer looks at query plan to determine if it can answer query using statistics * and than change the plan to answer query entirely using statistics stored in metastore. */ -public class StatsOptimizer implements Transform { +public class StatsOptimizer extends Transform { // TODO: [HIVE-6289] while getting stats from metastore, we currently only get one col at // a time; this could be improved - get all necessary columns in advance, then use local. // TODO: [HIVE-6292] aggregations could be done directly in metastore. Hive over MySQL! http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java index 9f957da..9ce9478 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Transform.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.optimizer; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.session.SessionState; /** * Optimizer interface. All the rule-based optimizations implement this @@ -27,7 +29,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * current parse context (which contains the operator tree among other things), * perform all the optimizations, and then return the updated parse context. */ -public interface Transform { +public abstract class Transform { /** * All transformation steps implement this interface. * @@ -36,5 +38,19 @@ public interface Transform { * @return ParseContext * @throws SemanticException */ - ParseContext transform(ParseContext pctx) throws SemanticException; + public abstract ParseContext transform(ParseContext pctx) throws SemanticException; + + public void beginPerfLogging() { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } + + public void endPerfLogging() { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER); + } + public void endPerfLogging(String additionalInfo) { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, additionalInfo); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java index 1d0a254..fc6b99d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverterPostProc.java @@ -46,7 +46,7 @@ import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; -public class HiveOpConverterPostProc implements Transform { +public class HiveOpConverterPostProc extends Transform { private static final Logger LOG = LoggerFactory.getLogger(HiveOpConverterPostProc.class); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java index 1b5d921..e1927e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java @@ -82,7 +82,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; * Test queries associated with this optimizer are correlationoptimizer1.q to * correlationoptimizer14.q */ -public class CorrelationOptimizer implements Transform { +public class CorrelationOptimizer extends Transform { private static final Logger LOG = LoggerFactory.getLogger(CorrelationOptimizer.class.getName()); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 583e72f..8da9ad5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -62,7 +62,7 @@ import com.google.common.collect.Lists; * * This optimizer removes/replaces child-RS (not parent) which is safer way for DefaultGraphWalker. */ -public class ReduceSinkDeDuplication implements Transform { +public class ReduceSinkDeDuplication extends Transform { private static final String RS = ReduceSinkOperator.getOperatorName(); private static final String GBY = GroupByOperator.getOperatorName(); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java index ea1ece6..e59127f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/index/RewriteGBUsingIndex.java @@ -89,7 +89,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; * For test cases, @see ql_rewrite_gbtoidx.q */ -public class RewriteGBUsingIndex implements Transform { +public class RewriteGBUsingIndex extends Transform { private ParseContext parseContext; private Hive hiveDb; private HiveConf hiveConf; http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java index 82e26d9..0c2ff32 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java @@ -53,7 +53,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; * and tables from the plan before it goes through other * optimization phases. */ -public class Generator implements Transform { +public class Generator extends Transform { /* (non-Javadoc) * @see org.apache.hadoop.hive.ql.optimizer.Transform#transform(org.apache.hadoop.hive.ql.parse.ParseContext) http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPruner.java index f399a20..1965120 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/listbucketingpruner/ListBucketingPruner.java @@ -41,7 +41,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; * The transformation step that does list bucketing pruning. * */ -public class ListBucketingPruner implements Transform { +public class ListBucketingPruner extends Transform { static final Logger LOG = LoggerFactory.getLogger(ListBucketingPruner.class.getName()); /* http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java index 0398115..b32d730 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java @@ -49,7 +49,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * This class annotates each operator with its traits. The OpTraits class * specifies the traits that are populated for each operator. */ -public class AnnotateWithOpTraits implements Transform { +public class AnnotateWithOpTraits extends Transform { @Override public ParseContext transform(ParseContext pctx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PartitionConditionRemover.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PartitionConditionRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PartitionConditionRemover.java index 812ca51..7baede9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PartitionConditionRemover.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PartitionConditionRemover.java @@ -44,7 +44,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * The transformation step that does partition condition remover. * */ -public class PartitionConditionRemover implements Transform { +public class PartitionConditionRemover extends Transform { // The log private static final Logger LOG = LoggerFactory http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index d99362a..90e9b3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -70,7 +70,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; * The transformation step that does partition pruning. * */ -public class PartitionPruner implements Transform { +public class PartitionPruner extends Transform { // The log public static final String CLASS_NAME = PartitionPruner.class.getName(); http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java index c8b3545..f0c1e86 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; -public class AnnotateWithStatistics implements Transform { +public class AnnotateWithStatistics extends Transform { @Override public ParseContext transform(ParseContext pctx) throws SemanticException { http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java index 547b486..9a53e0c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java @@ -50,7 +50,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * * The parse Context is not changed. */ -public class UnionProcessor implements Transform { +public class UnionProcessor extends Transform { /** * empty constructor. http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 131414c..9629fb5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -109,6 +109,7 @@ import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; @@ -182,6 +183,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.serdeConstants; @@ -853,7 +855,10 @@ public class CalcitePlanner extends SemanticAnalyzer { this.cluster = cluster; this.relOptSchema = relOptSchema; + PerfLogger perfLogger = SessionState.getPerfLogger(); + // 1. Gen Calcite Plan + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { calciteGenPlan = genLogicalPlan(getQB(), true); resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema( @@ -863,6 +868,7 @@ public class CalcitePlanner extends SemanticAnalyzer { semanticException = e; throw new RuntimeException(e); } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Plan generation"); // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); @@ -875,6 +881,7 @@ public class CalcitePlanner extends SemanticAnalyzer { // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { List<RelMetadataProvider> list = Lists.newArrayList(); list.add(mdProvider.getMetadataProvider()); @@ -912,22 +919,26 @@ public class CalcitePlanner extends SemanticAnalyzer { throw e; } } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering"); } else { calciteOptimizedPlan = calcitePreCboPlan; disableSemJoinReordering = false; } // 4. Run other optimizations that do not need stats + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, UnionMergeRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_PROJECT_FACTORY), HiveAggregateProjectMergeRule.INSTANCE, HiveJoinCommuteRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats"); // 5. Run aggregate-join transpose (cost based) // If it failed because of missing stats, we continue with // the rest of optimizations if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); try { HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE); @@ -953,17 +964,21 @@ public class CalcitePlanner extends SemanticAnalyzer { throw e; } } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Aggregate join transpose"); } // 7. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); } // 8. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // 8.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, @@ -983,6 +998,7 @@ public class CalcitePlanner extends SemanticAnalyzer { calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Translation from Calcite tree to Hive tree"); } if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { @@ -1011,14 +1027,19 @@ public class CalcitePlanner extends SemanticAnalyzer { // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. + PerfLogger perfLogger = SessionState.getPerfLogger(); + //1. Distinct aggregate rewrite // Run this optimization early, since it is expanding the operator pipeline. if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // Its not clear, if this rewrite is always performant on MR, since extra map phase // introduced for 2nd MR job may offset gains of this multi-stage aggregation. // We need a cost model for MR to enable this on MR. basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); } // 2. Try factoring out common filter elements & separating deterministic @@ -1026,14 +1047,18 @@ public class CalcitePlanner extends SemanticAnalyzer { // add on-clauses for old style Join Syntax // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, HepMatchOrder.ARBITRARY, HivePreFilteringRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); // 3. PPD for old Join Syntax // NOTE: PPD needs to run before adding not null filters in order to // support old style join syntax (so that on-clauses will get filled up). // TODO: Add in ReduceExpressionrules (Constant folding) to below once // HIVE-11927 is fixed. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( @@ -1041,6 +1066,8 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( HiveRelFactories.HIVE_FILTER_FACTORY)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, PPD for old join syntax"); // TODO: Transitive inference, constant prop & Predicate push down has to // do multiple passes till no more inference is left @@ -1048,14 +1075,19 @@ public class CalcitePlanner extends SemanticAnalyzer { // pred is present below may not be sufficient as inferred & pushed pred // could have been mutated by constant folding/prop // 4. Transitive inference for join on clauses + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, new HiveJoinPushTransitivePredicatesRule( Join.class, HiveRelFactories.HIVE_FILTER_FACTORY)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Transitive inference for join on clauses"); + // 5. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // This should be a cost based decision, but till we enable the extended cost // model, we will use the given value for the variable final float reductionProportion = HiveConf.getFloatVar(conf, @@ -1067,14 +1099,20 @@ public class CalcitePlanner extends SemanticAnalyzer { basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } // 6. Add not null filters + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Add not null filters"); // 7. Rerun Constant propagation and PPD now that we have added Not NULL filters & did transitive inference // TODO: Add in ReduceExpressionrules (Constant folding) to below once // HIVE-11927 is fixed. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( @@ -1082,31 +1120,48 @@ public class CalcitePlanner extends SemanticAnalyzer { HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( HiveRelFactories.HIVE_FILTER_FACTORY)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Constant propagation and PPD"); // 8. Push Down Semi Joins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); // 9. Apply Partition Pruning + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, new HivePartitionPruneRule(conf)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Partition Pruning"); // 10. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); basePlan = fieldTrimmer.trim(basePlan); - + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Projection Pruning"); // 11. Merge Project-Project if possible + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, new ProjectMergeRule(true, HiveRelFactories.HIVE_PROJECT_FACTORY)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Merge Project-Project"); + // 12. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTSTransposeRule( Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Rerun PPD"); return basePlan; } http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 147ff7e..ff971ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; @@ -86,6 +87,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.TezWork; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; /** @@ -112,7 +114,8 @@ public class TezCompiler extends TaskCompiler { @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set<ReadEntity> inputs, Set<WriteEntity> outputs) throws SemanticException { - + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // Create the context for the walker OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx, inputs, outputs); @@ -128,6 +131,7 @@ public class TezCompiler extends TaskCompiler { // after the stats phase we might have some cyclic dependencies that we need // to take care of. runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Tez compiler"); } http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java index 2312798..23fa2aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java @@ -79,7 +79,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; * etc) change the columns as data flows through them. In such cases the column * references are replaced by the corresponding expression in the input data. */ -public class PredicatePushDown implements Transform { +public class PredicatePushDown extends Transform { private static final Logger LOG = LoggerFactory.getLogger(PredicatePushDown.class); private ParseContext pGraphContext; http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java index fb76d5d..b091ebc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java @@ -56,7 +56,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * propagates filters to other aliases based on join condition */ -public class PredicateTransitivePropagate implements Transform { +public class PredicateTransitivePropagate extends Transform { private ParseContext pGraphContext; http://git-wip-us.apache.org/repos/asf/hive/blob/592179c4/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index 7aa91f2..accfb3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -60,7 +60,7 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * creates synthetic predicates that represent "IN (keylist other table)" */ -public class SyntheticJoinPredicate implements Transform { +public class SyntheticJoinPredicate extends Transform { private static transient Logger LOG = LoggerFactory.getLogger(SyntheticJoinPredicate.class.getName());