Repository: hive Updated Branches: refs/heads/master fb675ce74 -> 1e884cc5f
HIVE-18995: Vectorization: Add option to suppress "Execution mode: vectorized" for testing purposes (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1e884cc5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1e884cc5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1e884cc5 Branch: refs/heads/master Commit: 1e884cc5f5e8d2fa966a604066ac059022ca5649 Parents: fb675ce Author: Matt McCline <mmccl...@hortonworks.com> Authored: Tue Mar 27 02:28:01 2018 -0500 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Tue Mar 27 02:28:01 2018 -0500 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 5 + .../optimizer/physical/PhysicalOptimizer.java | 5 +- .../hive/ql/optimizer/physical/Vectorizer.java | 234 ++++++++++++------- .../apache/hadoop/hive/ql/plan/BaseWork.java | 141 +++++++---- .../org/apache/hadoop/hive/ql/plan/MapWork.java | 4 +- .../hadoop/hive/ql/plan/MergeJoinWork.java | 2 +- .../apache/hadoop/hive/ql/plan/ReduceWork.java | 16 +- 7 files changed, 268 insertions(+), 139 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8d9b5a3..5888eaa 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2999,6 +2999,11 @@ public class HiveConf extends Configuration { "internal use only, used to override the hive.vectorized.execution.enabled setting and\n" + "turn off vectorization. The default is false, of course", true), + HIVE_TEST_VECTORIZATION_SUPPRESS_EXPLAIN_EXECUTION_MODE( + "hive.test.vectorization.suppress.explain.execution.mode", false, + "internal use only, used to suppress \"Execution mode: vectorized\" EXPLAIN display.\n" + + "The default is false, of course", + true), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java index 0f3c5f2..d508d02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java @@ -85,7 +85,10 @@ public class PhysicalOptimizer { // Vectorization should be the last optimization, because it doesn't modify the plan // or any operators. It makes a very low level transformation to the expressions to // run in the vectorized mode. - if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) { + if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED) || + HiveConf.getVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_ENABLED_OVERRIDE).equalsIgnoreCase( + "enable")) { resolvers.add(new Vectorizer()); } if (!"none".equalsIgnoreCase(hiveConf.getVar(HiveConf.ConfVars.HIVESTAGEIDREARRANGE))) { http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13a2fc4..0c94688 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -231,6 +231,7 @@ import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hive.common.util.AnnotationUtils; import org.apache.hadoop.util.ReflectionUtils; + import com.google.common.collect.ImmutableSet; import com.google.common.base.Preconditions; @@ -300,6 +301,7 @@ public class Vectorizer implements PhysicalPlanResolver { boolean isVectorizationEnabled; private VectorizationEnabledOverride vectorizationEnabledOverride; + boolean isTestForcedVectorizationEnable; private boolean useVectorizedInputFileFormat; private boolean useVectorDeserialize; @@ -336,6 +338,8 @@ public class Vectorizer implements PhysicalPlanResolver { supportedAcidInputFormats.add(OneNullRowInputFormat.class.getName()); } + private boolean isTestVectorizationSuppressExplainExecutionMode; + private BaseWork currentBaseWork; private Operator<? extends OperatorDesc> currentOperator; private Collection<Class<?>> vectorizedInputFormatExcludes; @@ -651,6 +655,10 @@ public class Vectorizer implements PhysicalPlanResolver { baseWork.setAllNative(allNative); baseWork.setUsesVectorUDFAdaptor(usesVectorUDFAdaptor); + + baseWork.setIsTestForcedVectorizationEnable(isTestForcedVectorizationEnable); + baseWork.setIsTestVectorizationSuppressExplainExecutionMode( + isTestVectorizationSuppressExplainExecutionMode); } } @@ -923,19 +931,30 @@ public class Vectorizer implements PhysicalPlanResolver { Task<? extends Serializable> currTask = (Task<? extends Serializable>) nd; if (currTask instanceof MapRedTask) { MapredWork mapredWork = ((MapRedTask) currTask).getWork(); - convertMapWork(mapredWork.getMapWork(), false); + + MapWork mapWork = mapredWork.getMapWork(); + setMapWorkExplainConditions(mapWork); + convertMapWork(mapredWork.getMapWork(), /* isTezOrSpark */ false); + logMapWorkExplainVectorization(mapWork); + ReduceWork reduceWork = mapredWork.getReduceWork(); if (reduceWork != null) { + // Always set the EXPLAIN conditions. setReduceWorkExplainConditions(reduceWork); // We do not vectorize MR Reduce. + + logReduceWorkExplainVectorization(reduceWork); } } else if (currTask instanceof TezTask) { TezWork work = ((TezTask) currTask).getWork(); for (BaseWork baseWork: work.getAllWork()) { if (baseWork instanceof MapWork) { - convertMapWork((MapWork) baseWork, true); + MapWork mapWork = (MapWork) baseWork; + setMapWorkExplainConditions(mapWork); + convertMapWork(mapWork, /* isTezOrSpark */ true); + logMapWorkExplainVectorization(mapWork); } else if (baseWork instanceof ReduceWork) { ReduceWork reduceWork = (ReduceWork) baseWork; @@ -946,13 +965,17 @@ public class Vectorizer implements PhysicalPlanResolver { if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + logReduceWorkExplainVectorization(reduceWork); } } } else if (currTask instanceof SparkTask) { SparkWork sparkWork = (SparkWork) currTask.getWork(); for (BaseWork baseWork : sparkWork.getAllWork()) { if (baseWork instanceof MapWork) { - convertMapWork((MapWork) baseWork, true); + MapWork mapWork = (MapWork) baseWork; + setMapWorkExplainConditions(mapWork); + convertMapWork(mapWork, /* isTezOrSpark */ true); + logMapWorkExplainVectorization(mapWork); } else if (baseWork instanceof ReduceWork) { ReduceWork reduceWork = (ReduceWork) baseWork; @@ -962,37 +985,125 @@ public class Vectorizer implements PhysicalPlanResolver { if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + logReduceWorkExplainVectorization(reduceWork); } } + } else if (currTask instanceof FetchTask) { + LOG.info("Vectorizing Fetch not supported"); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Ignoring vectorization of " + currTask.getClass().getSimpleName()); + } } return null; } - private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws SemanticException { - - mapWork.setVectorizationExamined(true); + private void setExplainConditions(BaseWork baseWork) { // Global used when setting errors, etc. - currentBaseWork = mapWork; + currentBaseWork = baseWork; - VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); - vectorTaskColumnInfo.assume(); + baseWork.setVectorizedVertexNum(++vectorizedVertexNum); + baseWork.setVectorizationExamined(true); + } - mapWork.setVectorizedVertexNum(++vectorizedVertexNum); + private void setMapWorkExplainConditions(MapWork mapWork) { + setExplainConditions(mapWork); + } - if (!validateAndVectorizeMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark)) { - if (currentBaseWork.getVectorizationEnabled()) { - VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); - if (notVectorizedReason == null) { - LOG.info("Cannot vectorize: unknown"); - } else { - LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); - } + private void setReduceWorkExplainConditions(ReduceWork reduceWork) { + + setExplainConditions(reduceWork); + + reduceWork.setReduceVectorizationEnabled(isReduceVectorizationEnabled); + reduceWork.setVectorReduceEngine( + HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); + } + + private boolean logExplainVectorization(BaseWork baseWork, String name) { + + if (!baseWork.getVectorizationExamined()) { + return false; + } + + LOG.info(name + " vectorization enabled: " + baseWork.getVectorizationEnabled()); + boolean isVectorized = baseWork.getVectorMode(); + LOG.info(name + " vectorized: " + isVectorized); + if (!isVectorized) { + VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason(); + if (notVectorizedReason != null) { + LOG.info(name + " notVectorizedReason: " + notVectorizedReason.toString()); + } + } + LOG.info(name + " vectorizedVertexNum: " + baseWork.getVectorizedVertexNum()); + + if (LOG.isDebugEnabled() && isVectorized) { + VectorizedRowBatchCtx batchContext = baseWork.getVectorizedRowBatchCtx(); + LOG.debug(name + " dataColumnCount: " + batchContext.getDataColumnCount()); + int[] dataColumnNums = batchContext.getDataColumnNums(); + if (dataColumnNums != null) { + LOG.debug(name + " includeColumns: " + Arrays.toString(dataColumnNums)); } + LOG.debug(name + " partitionColumnCount: " + batchContext.getPartitionColumnCount()); + LOG.debug(name + " dataColumns: " + + BaseWork.BaseExplainVectorization.getColumns( + batchContext, 0, batchContext.getDataColumnCount())); + LOG.debug(name + " scratchColumnTypeNames: " + + BaseWork.BaseExplainVectorization.getScratchColumns(batchContext)); + VirtualColumn[] neededVirtualColumns = batchContext.getNeededVirtualColumns(); + if (neededVirtualColumns != null && neededVirtualColumns.length != 0) { + LOG.debug(name + " neededVirtualColumns: " + Arrays.toString(neededVirtualColumns)); + } + } + return true; + } + + private void logMapWorkExplainVectorization(MapWork mapWork) { + + if (!logExplainVectorization(mapWork, "Map")) { + return; + } + + // Conditions. + List<String> enabledConditionsMet = mapWork.getVectorizationEnabledConditionsMet(); + if (enabledConditionsMet != null && !enabledConditionsMet.isEmpty()) { + LOG.info("Map enabledConditionsMet: " + enabledConditionsMet.toString()); + } + List<String> enabledConditionsNotMet = mapWork.getVectorizationEnabledConditionsNotMet(); + if (enabledConditionsNotMet != null && !enabledConditionsNotMet.isEmpty()) { + LOG.info("Map enabledConditionsNotMet: " + enabledConditionsNotMet.toString()); + } + Set<String> inputFileFormatClassNameSet = + mapWork.getVectorizationInputFileFormatClassNameSet(); + if (inputFileFormatClassNameSet != null && !inputFileFormatClassNameSet.isEmpty()) { + LOG.info("Map inputFileFormatClassNameSet: " + inputFileFormatClassNameSet.toString()); } } + private void logReduceWorkExplainVectorization(ReduceWork reduceWork) { + + if (!logExplainVectorization(reduceWork, "Reduce")) { + return; + } + + // Conditions. + LOG.info("Reducer " + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED.varname + + ": " + reduceWork.getReduceVectorizationEnabled()); + LOG.info("Reducer engine: " + reduceWork.getVectorReduceEngine()); + } + + private void convertMapWork(MapWork mapWork, boolean isTezOrSpark) throws SemanticException { + + // We have to evaluate the input format to see if vectorization is enabled, so + // we do not set it right here. + + VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); + vectorTaskColumnInfo.assume(); + + validateAndVectorizeMapWork(mapWork, vectorTaskColumnInfo, isTezOrSpark); + } + private void addMapWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) { opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + ".*" + FileSinkOperator.getOperatorName()), np); @@ -1595,7 +1706,7 @@ public class Vectorizer implements PhysicalPlanResolver { return new ImmutablePair<Boolean,Boolean>(true, false); } - private boolean validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, + private void validateAndVectorizeMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTezOrSpark) throws SemanticException { //-------------------------------------------------------------------------------------------- @@ -1607,7 +1718,7 @@ public class Vectorizer implements PhysicalPlanResolver { VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); Preconditions.checkState(notVectorizedReason != null); mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); - return false; + return; } String alias = onlyOneTableScanPair.left; TableScanOperator tableScanOperator = onlyOneTableScanPair.right; @@ -1625,7 +1736,7 @@ public class Vectorizer implements PhysicalPlanResolver { Preconditions.checkState(notVectorizedReason != null); mapWork.setVectorizationEnabledConditionsNotMet(Arrays.asList(new String[] {notVectorizedReason.toString()})); } - return false; + return; } final int dataColumnCount = @@ -1705,7 +1816,7 @@ public class Vectorizer implements PhysicalPlanResolver { * Validate and vectorize the Map operator tree. */ if (!validateAndVectorizeMapOperators(mapWork, tableScanOperator, isTezOrSpark, vectorTaskColumnInfo)) { - return false; + return; } //-------------------------------------------------------------------------------------------- @@ -1714,11 +1825,7 @@ public class Vectorizer implements PhysicalPlanResolver { mapWork.setVectorMode(true); - if (LOG.isDebugEnabled()) { - debugDisplayVertexInfo(mapWork); - } - - return true; + return; } private boolean validateAndVectorizeMapOperators(MapWork mapWork, TableScanOperator tableScanOperator, @@ -1859,47 +1966,26 @@ public class Vectorizer implements PhysicalPlanResolver { return newChildren; } - private void setReduceWorkExplainConditions(ReduceWork reduceWork) { - - reduceWork.setVectorizationExamined(true); - - reduceWork.setReduceVectorizationEnabled(isReduceVectorizationEnabled); - reduceWork.setVectorReduceEngine( - HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE)); - } - private void convertReduceWork(ReduceWork reduceWork) throws SemanticException { - // Global used when setting errors, etc. - currentBaseWork = reduceWork; - currentBaseWork.setVectorizationEnabled(true); + reduceWork.setVectorizationEnabled(true); VectorTaskColumnInfo vectorTaskColumnInfo = new VectorTaskColumnInfo(); vectorTaskColumnInfo.assume(); - reduceWork.setVectorizedVertexNum(++vectorizedVertexNum); reduceWork.setVectorizedTestingReducerBatchSize(vectorizedTestingReducerBatchSize); - if (!validateAndVectorizeReduceWork(reduceWork, vectorTaskColumnInfo)) { - if (currentBaseWork.getVectorizationEnabled()) { - VectorizerReason notVectorizedReason = currentBaseWork.getNotVectorizedReason(); - if (notVectorizedReason == null) { - LOG.info("Cannot vectorize: unknown"); - } else { - LOG.info("Cannot vectorize: " + notVectorizedReason.toString()); - } - } - } + validateAndVectorizeReduceWork(reduceWork, vectorTaskColumnInfo); } - private boolean validateAndVectorizeReduceWork(ReduceWork reduceWork, + private void validateAndVectorizeReduceWork(ReduceWork reduceWork, VectorTaskColumnInfo vectorTaskColumnInfo) throws SemanticException { Operator<? extends OperatorDesc> reducer = reduceWork.getReducer(); // Validate input to ReduceWork. if (!getOnlyStructObjectInspectors(reduceWork, vectorTaskColumnInfo)) { - return false; + return; } //-------------------------------------------------------------------------------------------- @@ -1908,7 +1994,7 @@ public class Vectorizer implements PhysicalPlanResolver { * Validate and vectorize the Reduce operator tree. */ if (!validateAndVectorizeReduceOperators(reduceWork, vectorTaskColumnInfo)) { - return false; + return; } //-------------------------------------------------------------------------------------------- @@ -1917,11 +2003,7 @@ public class Vectorizer implements PhysicalPlanResolver { reduceWork.setVectorMode(true); - if (LOG.isDebugEnabled()) { - debugDisplayVertexInfo(reduceWork); - } - - return true; + return; } private boolean validateAndVectorizeReduceOperators(ReduceWork reduceWork, @@ -2073,6 +2155,7 @@ public class Vectorizer implements PhysicalPlanResolver { HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED); final boolean weCanAttemptVectorization; + isTestForcedVectorizationEnable = false; switch (vectorizationEnabledOverride) { case NONE: weCanAttemptVectorization = isVectorizationEnabled; @@ -2082,6 +2165,12 @@ public class Vectorizer implements PhysicalPlanResolver { break; case ENABLE: weCanAttemptVectorization = true; + isTestForcedVectorizationEnable = !isVectorizationEnabled; + + // Different parts of the code rely on this being set... + HiveConf.setBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + isVectorizationEnabled = true; break; default: throw new RuntimeException("Unexpected vectorization enabled override " + @@ -2162,6 +2251,10 @@ public class Vectorizer implements PhysicalPlanResolver { hiveVectorAdaptorUsageMode = HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf); + isTestVectorizationSuppressExplainExecutionMode = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_TEST_VECTORIZATION_SUPPRESS_EXPLAIN_EXECUTION_MODE); + // create dispatcher and graph walker Dispatcher disp = new VectorizationDispatcher(); TaskGraphWalker ogw = new TaskGraphWalker(disp); @@ -2240,7 +2333,7 @@ public class Vectorizer implements PhysicalPlanResolver { return false; } if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { - LOG.info("Cannot vectorize join with complex ON clause"); + setOperatorIssue("Non-equi joins not supported"); return false; } return true; @@ -4778,27 +4871,4 @@ public class Vectorizer implements PhysicalPlanResolver { return vectorOp; } - - public void debugDisplayVertexInfo(BaseWork work) { - - VectorizedRowBatchCtx vectorizedRowBatchCtx = work.getVectorizedRowBatchCtx(); - - String[] allColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - TypeInfo[] columnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); - DataTypePhysicalVariation[] dataTypePhysicalVariations = vectorizedRowBatchCtx.getRowdataTypePhysicalVariations(); - int partitionColumnCount = vectorizedRowBatchCtx.getPartitionColumnCount(); - int virtualColumnCount = vectorizedRowBatchCtx.getVirtualColumnCount(); - String[] scratchColumnTypeNames =vectorizedRowBatchCtx.getScratchColumnTypeNames(); - DataTypePhysicalVariation[] scratchdataTypePhysicalVariations = vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations(); - - LOG.debug("debugDisplayVertexInfo rowColumnNames " + Arrays.toString(allColumnNames)); - LOG.debug("debugDisplayVertexInfo rowColumnTypeInfos " + Arrays.toString(columnTypeInfos)); - LOG.debug("debugDisplayVertexInfo rowDataTypePhysicalVariations " + - (dataTypePhysicalVariations == null ? "NULL" : Arrays.toString(dataTypePhysicalVariations))); - LOG.debug("debugDisplayVertexInfo partitionColumnCount " + partitionColumnCount); - LOG.debug("debugDisplayVertexInfo virtualColumnCount " + virtualColumnCount); - LOG.debug("debugDisplayVertexInfo scratchColumnTypeNames " + Arrays.toString(scratchColumnTypeNames)); - LOG.debug("debugDisplayVertexInfo scratchdataTypePhysicalVariations " + - (scratchdataTypePhysicalVariations == null ? "NULL" : Arrays.toString(scratchdataTypePhysicalVariations))); - } } http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java index dc3219b..31f54c3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java @@ -99,6 +99,9 @@ public abstract class BaseWork extends AbstractOperatorDesc { protected long vectorizedVertexNum; protected int vectorizedTestingReducerBatchSize; + private boolean isTestForcedVectorizationEnable; + private boolean isTestVectorizationSuppressExplainExecutionMode; + protected boolean llapMode = false; protected boolean uberMode = false; @@ -262,6 +265,24 @@ public abstract class BaseWork extends AbstractOperatorDesc { return allNative; } + public void setIsTestForcedVectorizationEnable(boolean isTestForcedVectorizationEnable) { + this.isTestForcedVectorizationEnable = isTestForcedVectorizationEnable; + } + + public boolean getIsTestForcedVectorizationEnable() { + return isTestForcedVectorizationEnable; + } + + public void setIsTestVectorizationSuppressExplainExecutionMode( + boolean isTestVectorizationSuppressExplainExecutionMode) { + this.isTestVectorizationSuppressExplainExecutionMode = + isTestVectorizationSuppressExplainExecutionMode; + } + + public boolean getIsTestVectorizationSuppressExplainExecutionMode() { + return isTestVectorizationSuppressExplainExecutionMode; + } + public static class BaseExplainVectorization { private final BaseWork baseWork; @@ -287,12 +308,14 @@ public abstract class BaseWork extends AbstractOperatorDesc { return result; } - @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public boolean enabled() { return baseWork.getVectorizationEnabled(); } - @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean vectorized() { if (!baseWork.getVectorizationEnabled()) { return null; @@ -300,7 +323,8 @@ public abstract class BaseWork extends AbstractOperatorDesc { return baseWork.getVectorMode(); } - @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String notVectorizedReason() { if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) { return null; @@ -312,7 +336,8 @@ public abstract class BaseWork extends AbstractOperatorDesc { return notVectorizedReason.toString(); } - @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean nativeVectorized() { if (!baseWork.getVectorMode()) { return null; @@ -320,7 +345,45 @@ public abstract class BaseWork extends AbstractOperatorDesc { return baseWork.getAllNative(); } - @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public static List<String> getColumns(VectorizedRowBatchCtx vectorizedRowBatchCtx, + int startIndex, int count) { + String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); + TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); + DataTypePhysicalVariation[] dataTypePhysicalVariations = + vectorizedRowBatchCtx.getRowdataTypePhysicalVariations(); + + List<String> result = new ArrayList<String>(count); + final int end = startIndex + count; + for (int i = startIndex; i < end; i++) { + String displayString = rowColumnNames[i] + ":" + rowColumnTypeInfos[i]; + if (dataTypePhysicalVariations != null && + dataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) { + displayString += "/" + dataTypePhysicalVariations[i].toString(); + } + result.add(displayString); + } + return result; + } + + public static String getScratchColumns(VectorizedRowBatchCtx vectorizedRowBatchCtx) { + String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames(); + DataTypePhysicalVariation[] scratchDataTypePhysicalVariations = + vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations(); + final int size = scratchColumnTypeNames.length; + List<String> result = new ArrayList<String>(size); + for (int i = 0; i < size; i++) { + String displayString = scratchColumnTypeNames[i]; + if (scratchDataTypePhysicalVariations != null && + scratchDataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) { + displayString += "/" + scratchDataTypePhysicalVariations[i].toString(); + } + result.add(displayString); + } + return result.toString(); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public Boolean usesVectorUDFAdaptor() { if (!baseWork.getVectorMode()) { return null; @@ -336,71 +399,54 @@ public abstract class BaseWork extends AbstractOperatorDesc { this.vectorizedRowBatchCtx = vectorizedRowBatchCtx; } - private List<String> getColumns(int startIndex, int count) { - String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames(); - TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos(); - DataTypePhysicalVariation[] dataTypePhysicalVariations = - vectorizedRowBatchCtx.getRowdataTypePhysicalVariations(); - - List<String> result = new ArrayList<String>(count); - final int end = startIndex + count; - for (int i = startIndex; i < end; i++) { - String displayString = rowColumnNames[i] + ":" + rowColumnTypeInfos[i]; - if (dataTypePhysicalVariations != null && - dataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) { - displayString += "/" + dataTypePhysicalVariations[i].toString(); - } - result.add(displayString); - } - return result; - } - - @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getDataColumns() { - return getColumns(0, vectorizedRowBatchCtx.getDataColumnCount()); + return getColumns( + vectorizedRowBatchCtx, + 0, + vectorizedRowBatchCtx.getDataColumnCount()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List<String> getPartitionColumns() { - return getColumns(vectorizedRowBatchCtx.getDataColumnCount(), vectorizedRowBatchCtx.getPartitionColumnCount()); + return getColumns( + vectorizedRowBatchCtx, + vectorizedRowBatchCtx.getDataColumnCount(), + vectorizedRowBatchCtx.getPartitionColumnCount()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getDataColumnNums() { int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums(); if (dataColumnNums == null) { return null; } - return Arrays.toString(vectorizedRowBatchCtx.getDataColumnNums()); + return Arrays.toString(dataColumnNums); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public int getDataColumnCount() { return vectorizedRowBatchCtx.getDataColumnCount(); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public int getPartitionColumnCount() { return vectorizedRowBatchCtx.getPartitionColumnCount(); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getScratchColumnTypeNames() { - String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames(); - DataTypePhysicalVariation[] scratchDataTypePhysicalVariations = vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations(); - final int size = scratchColumnTypeNames.length; - List<String> result = new ArrayList<String>(size); - for (int i = 0; i < size; i++) { - String displayString = scratchColumnTypeNames[i]; - if (scratchDataTypePhysicalVariations != null && scratchDataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) { - displayString += "/" + scratchDataTypePhysicalVariations[i].toString(); - } - result.add(displayString); - } - return result.toString(); + return getScratchColumns(vectorizedRowBatchCtx); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "neededVirtualColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "neededVirtualColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public String getNeededVirtualColumns() { VirtualColumn[] neededVirtualColumns = vectorizedRowBatchCtx.getNeededVirtualColumns(); if (neededVirtualColumns == null || neededVirtualColumns.length == 0) { @@ -411,7 +457,8 @@ public abstract class BaseWork extends AbstractOperatorDesc { } - @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public RowBatchContextExplainVectorization vectorizedRowBatchContext() { if (!baseWork.getVectorMode()) { return null; http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index f147309..5bf0625 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -468,7 +468,9 @@ public class MapWork extends BaseWork { @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { - if (vectorMode) { + if (vectorMode && + !(getIsTestForcedVectorizationEnable() && + getIsTestVectorizationSuppressExplainExecutionMode())) { if (llapMode) { if (uberMode) { return "vectorized, uber"; http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index b0ae64a..24ce898 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -175,7 +175,7 @@ public class MergeJoinWork extends BaseWork { public boolean getLlapMode() { return getMainWork().getLlapMode(); } - + public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } http://git-wip-us.apache.org/repos/asf/hive/blob/1e884cc5/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java index ff5acbb..51298ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java @@ -126,15 +126,17 @@ public class ReduceWork extends BaseWork { @Explain(displayName = "Execution mode", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED }, vectorization = Vectorization.SUMMARY_PATH) public String getExecutionMode() { - if (vectorMode) { + if (vectorMode && + !(getIsTestForcedVectorizationEnable() && + getIsTestVectorizationSuppressExplainExecutionMode())) { if (llapMode) { - if (uberMode) { - return "vectorized, uber"; - } else { - return "vectorized, llap"; - } + if (uberMode) { + return "vectorized, uber"; + } else { + return "vectorized, llap"; + } } else { - return "vectorized"; + return "vectorized"; } } else if (llapMode) { return uberMode? "uber" : "llap";