Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SortMergeJoinTaskDispatcher.java Fri Jan 23 19:59:11 2015 @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor; import org.apache.hadoop.hive.ql.parse.OpParseContext; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.parse.QBJoinTree; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin; import org.apache.hadoop.hive.ql.plan.ConditionalResolverCommonJoin.ConditionalResolverCommonJoinCtx; @@ -168,8 +167,7 @@ public class SortMergeJoinTaskDispatcher // create map join task and set big table as bigTablePosition private MapRedTask convertSMBTaskToMapJoinTask(MapredWork origWork, int bigTablePosition, - SMBMapJoinOperator smbJoinOp, - QBJoinTree joinTree) + SMBMapJoinOperator smbJoinOp) throws UnsupportedEncodingException, SemanticException { // deep copy a new mapred work MapredWork newWork = Utilities.clonePlan(origWork); @@ -178,7 +176,7 @@ public class SortMergeJoinTaskDispatcher .getParseContext().getConf()); // generate the map join operator; already checked the map join MapJoinOperator newMapJoinOp = - getMapJoinOperator(newTask, newWork, smbJoinOp, joinTree, bigTablePosition); + getMapJoinOperator(newTask, newWork, smbJoinOp, bigTablePosition); // The reducer needs to be restored - Consider a query like: // select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key; @@ -246,7 +244,6 @@ public class SortMergeJoinTaskDispatcher // get parseCtx for this Join Operator ParseContext parseCtx = physicalContext.getParseContext(); - QBJoinTree joinTree = parseCtx.getSmbMapJoinContext().get(originalSMBJoinOp); // Convert the work containing to sort-merge join into a work, as if it had a regular join. // Note that the operator tree is not changed - is still contains the SMB join, but the @@ -257,9 +254,13 @@ public class SortMergeJoinTaskDispatcher SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork); currWork.getMapWork().setOpParseCtxMap(parseCtx.getOpParseCtx()); - currWork.getMapWork().setJoinTree(joinTree); + currWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin()); + currWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc()); + currWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases()); currJoinWork.getMapWork().setOpParseCtxMap(parseCtx.getOpParseCtx()); - currJoinWork.getMapWork().setJoinTree(joinTree); + currJoinWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin()); + currJoinWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc()); + currJoinWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases()); // create conditional work list and task list List<Serializable> listWorks = new ArrayList<Serializable>(); @@ -296,7 +297,7 @@ public class SortMergeJoinTaskDispatcher // create map join task for the given big table position MapRedTask newTask = convertSMBTaskToMapJoinTask( - currJoinWork, bigTablePosition, newSMBJoinOp, joinTree); + currJoinWork, bigTablePosition, newSMBJoinOp); MapWork mapWork = newTask.getWork().getMapWork(); Operator<?> parentOp = originalSMBJoinOp.getParentOperators().get(bigTablePosition); @@ -334,7 +335,9 @@ public class SortMergeJoinTaskDispatcher listTasks.add(currTask); // clear JoinTree and OP Parse Context currWork.getMapWork().setOpParseCtxMap(null); - currWork.getMapWork().setJoinTree(null); + currWork.getMapWork().setLeftInputJoin(false); + currWork.getMapWork().setBaseSrc(null); + currWork.getMapWork().setMapAliases(null); // create conditional task and insert conditional task into task tree ConditionalWork cndWork = new ConditionalWork(listWorks); @@ -353,7 +356,7 @@ public class SortMergeJoinTaskDispatcher cndTsk.setResolverCtx(resolverCtx); // replace the current task with the new generated conditional task - replaceTaskWithConditionalTask(currTask, cndTsk, physicalContext); + replaceTaskWithConditionalTask(currTask, cndTsk); return cndTsk; } @@ -426,7 +429,6 @@ public class SortMergeJoinTaskDispatcher private MapJoinOperator getMapJoinOperator(MapRedTask task, MapredWork work, SMBMapJoinOperator oldSMBJoinOp, - QBJoinTree joinTree, int mapJoinPos) throws SemanticException { SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(task.getWork()); @@ -437,7 +439,6 @@ public class SortMergeJoinTaskDispatcher // generate the map join operator return MapJoinProcessor.convertSMBJoinToMapJoin(physicalContext.getConf(), - opParseContextMap, newSMBJoinOp, - joinTree, mapJoinPos, true); + opParseContextMap, newSMBJoinOp, mapJoinPos, true); } }
Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Fri Jan 23 19:59:11 2015 @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.conf.HiveC import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.*; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; @@ -74,6 +75,8 @@ import org.apache.hadoop.hive.ql.plan.Op import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; +import org.apache.hadoop.hive.ql.plan.SparkWork; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.TezWork; import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc; @@ -244,6 +247,7 @@ public class Vectorizer implements Physi supportedGenericUDFs.add(GenericUDFWhen.class); supportedGenericUDFs.add(GenericUDFCoalesce.class); supportedGenericUDFs.add(GenericUDFElt.class); + supportedGenericUDFs.add(GenericUDFInitCap.class); // For type casts supportedGenericUDFs.add(UDFToLong.class); @@ -309,6 +313,15 @@ public class Vectorizer implements Physi } } } + } else if (currTask instanceof SparkTask) { + SparkWork sparkWork = (SparkWork) currTask.getWork(); + for (BaseWork baseWork : sparkWork.getAllWork()) { + if (baseWork instanceof MapWork) { + convertMapWork((MapWork) baseWork, false); + } else if (baseWork instanceof ReduceWork) { + convertReduceWork((ReduceWork) baseWork); + } + } } return null; } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java Fri Jan 23 19:59:11 2015 @@ -28,6 +28,7 @@ import java.util.Map; import java.util.Stack; import org.apache.hadoop.hive.metastore.api.Index; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.index.bitmap.BitmapIndexHandler; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; /** * @@ -115,12 +117,14 @@ public class IndexWhereTaskDispatcher im supportedIndexes.add(BitmapIndexHandler.class.getName()); // query the metastore to know what columns we have indexed - Collection<Table> topTables = pctx.getTopToTable().values(); Map<TableScanOperator, List<Index>> indexes = new HashMap<TableScanOperator, List<Index>>(); - for (Map.Entry<TableScanOperator, Table> entry : pctx.getTopToTable().entrySet()) { - List<Index> tblIndexes = IndexUtils.getIndexes(entry.getValue(), supportedIndexes); - if (tblIndexes.size() > 0) { - indexes.put(entry.getKey(), tblIndexes); + for (Operator<? extends OperatorDesc> op : pctx.getTopOps().values()) { + if (op instanceof TableScanOperator) { + List<Index> tblIndexes = IndexUtils.getIndexes(((TableScanOperator) op).getConf() + .getTableMetadata(), supportedIndexes); + if (tblIndexes.size() > 0) { + indexes.put((TableScanOperator) op, tblIndexes); + } } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java Fri Jan 23 19:59:11 2015 @@ -138,7 +138,7 @@ public class PartitionPruner implements */ public static PrunedPartitionList prune(TableScanOperator ts, ParseContext parseCtx, String alias) throws SemanticException { - return prune(parseCtx.getTopToTable().get(ts), parseCtx.getOpToPartPruner().get(ts), + return prune(ts.getConf().getTableMetadata(), parseCtx.getOpToPartPruner().get(ts), parseCtx.getConf(), alias, parseCtx.getPrunedPartitions()); } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Fri Jan 23 19:59:11 2015 @@ -103,7 +103,7 @@ public class StatsRulesProcFactory { AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; PrunedPartitionList partList = aspCtx.getParseContext().getPrunedPartitions(tsop.getName(), tsop); - Table table = aspCtx.getParseContext().getTopToTable().get(tsop); + Table table = tsop.getConf().getTableMetadata(); try { // gather statistics for the first time and the attach it to table scan operator Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcFactory.java Fri Jan 23 19:59:11 2015 @@ -265,13 +265,20 @@ public final class UnionProcFactory { Operator<? extends OperatorDesc> operator = (Operator<? extends OperatorDesc>)stack.get(pos); + // (1) Because we have operator.supportUnionRemoveOptimization() for + // true only in SEL and FIL operators, + // this rule will actually only match UNION%(SEL%|FIL%)*FS% + // (2) The assumption here is that, if + // operator.getChildOperators().size() > 1, we are going to have + // multiple FS operators, i.e., multiple inserts. + // Current implementation does not support this. More details, please + // see HIVE-9217. + if (operator.getChildOperators() != null && operator.getChildOperators().size() > 1) { + return null; + } // Break if it encountered a union if (operator instanceof UnionOperator) { union = (UnionOperator)operator; - // No need for this optimization in case of multi-table inserts - if (union.getChildOperators().size() > 1) { - return null; - } break; } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/optimizer/unionproc/UnionProcessor.java Fri Jan 23 19:59:11 2015 @@ -96,7 +96,8 @@ public class UnionProcessor implements T // Walk the tree again to see if the union can be removed completely HiveConf conf = pCtx.getConf(); opRules.clear(); - if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE)) { + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE) + && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES)) { throw new Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java Fri Jan 23 19:59:11 2015 @@ -22,6 +22,8 @@ import java.lang.reflect.InvocationTarge import java.lang.reflect.UndeclaredThrowableException; import java.math.BigDecimal; import java.util.ArrayList; +import java.util.BitSet; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -66,9 +68,12 @@ import org.apache.calcite.rel.rules.Filt import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.ProjectRemoveRule; +import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; +import org.apache.calcite.rel.rules.UnionMergeRule; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; @@ -106,15 +111,16 @@ import org.apache.hadoop.hive.ql.exec.Op import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; @@ -159,12 +165,12 @@ import org.apache.hadoop.hive.serde2.typ import com.google.common.base.Function; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; -import com.google.common.collect.ImmutableList.Builder; public class CalcitePlanner extends SemanticAnalyzer { - private AtomicInteger noColsMissingStats = new AtomicInteger(0); + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private List<FieldSchema> topLevelFieldSchema; private SemanticException semanticException; private boolean runCBO = true; @@ -187,6 +193,7 @@ public class CalcitePlanner extends Sema } } + @Override @SuppressWarnings("rawtypes") Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { Operator sinkOp = null; @@ -282,7 +289,7 @@ public class CalcitePlanner extends Sema /** * Can CBO handle the given AST? - * + * * @param ast * Top level AST * @param qb @@ -290,7 +297,7 @@ public class CalcitePlanner extends Sema * @param cboCtx * @param semAnalyzer * @return boolean - * + * * Assumption:<br> * If top level QB is query then everything below it must also be * Query. @@ -345,7 +352,7 @@ public class CalcitePlanner extends Sema /** * Checks whether Calcite can handle the query. - * + * * @param queryProperties * @param conf * @param topLevelQB @@ -354,7 +361,7 @@ public class CalcitePlanner extends Sema * Whether return value should be verbose in case of failure. * @return null if the query can be handled; non-null reason string if it * cannot be. - * + * * Assumption:<br> * 1. If top level QB is query then everything below it must also be * Query<br> @@ -548,7 +555,7 @@ public class CalcitePlanner extends Sema /** * Get Optimized AST for the given QB tree in the semAnalyzer. - * + * * @return Optimized operator tree translated in to Hive AST * @throws SemanticException */ @@ -572,7 +579,7 @@ public class CalcitePlanner extends Sema /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. - * + * * @param Exception */ private void rethrowCalciteException(Exception e) throws SemanticException { @@ -651,7 +658,7 @@ public class CalcitePlanner extends Sema private class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> { private RelOptCluster cluster; private RelOptSchema relOptSchema; - private Map<String, PrunedPartitionList> partitionCache; + private final Map<String, PrunedPartitionList> partitionCache; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -706,6 +713,12 @@ public class CalcitePlanner extends Sema hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); + hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); + hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); + hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); @@ -736,7 +749,7 @@ public class CalcitePlanner extends Sema /** * Perform all optimizations before Join Ordering. - * + * * @param basePlan * original plan * @param mdProvider @@ -754,7 +767,11 @@ public class CalcitePlanner extends Sema SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); // 2. PPD - basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + basePlan = hepPlan(basePlan, true, mdProvider, + ReduceExpressionsRule.PROJECT_INSTANCE, + ReduceExpressionsRule.FILTER_INSTANCE, + ReduceExpressionsRule.JOIN_INSTANCE, + new HiveFilterProjectTransposeRule( Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( HiveFilter.DEFAULT_FILTER_FACTORY), @@ -787,7 +804,7 @@ public class CalcitePlanner extends Sema /** * Run the HEP Planner with the given rule set. - * + * * @param basePlan * @param followPlanChanges * @param mdProvider @@ -1057,7 +1074,7 @@ public class CalcitePlanner extends Sema /** * Generate Join Logical Plan Relnode by walking through the join AST. - * + * * @param qb * @param aliasToRel * Alias(Table/Relation alias) to RelNode; only read and not @@ -1276,7 +1293,7 @@ public class CalcitePlanner extends Sema Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException { /* * Handle Subquery predicates. - * + * * Notes (8/22/14 hb): Why is this a copy of the code from {@link * #genFilterPlan} - for now we will support the same behavior as non CBO * route. - but plan to allow nested SubQueries(Restriction.9.m) and @@ -1509,7 +1526,7 @@ public class CalcitePlanner extends Sema } private RelNode genGBRelNode(List<ExprNodeDesc> gbExprs, List<AggInfo> aggInfoLst, - RelNode srcRel) throws SemanticException { + List<Integer> groupSets, RelNode srcRel) throws SemanticException { ImmutableMap<String, Integer> posMap = this.relToHiveColNameCalcitePosMap.get(srcRel); RexNodeConverter converter = new RexNodeConverter(this.cluster, srcRel.getRowType(), posMap, 0, false); @@ -1541,10 +1558,25 @@ public class CalcitePlanner extends Sema } RelNode gbInputRel = HiveProject.create(srcRel, gbChildProjLst, null); + // Grouping sets: we need to transform them into ImmutableBitSet + // objects for Calcite + List<ImmutableBitSet> transformedGroupSets = null; + if(groupSets != null && !groupSets.isEmpty()) { + Set<ImmutableBitSet> setTransformedGroupSets = + new HashSet<ImmutableBitSet>(groupSets.size()); + for(int val: groupSets) { + setTransformedGroupSets.add(convert(val)); + } + // Calcite expects the grouping sets sorted and without duplicates + transformedGroupSets = new ArrayList<ImmutableBitSet>(setTransformedGroupSets); + Collections.sort(transformedGroupSets, ImmutableBitSet.COMPARATOR); + } + HiveRelNode aggregateRel = null; try { aggregateRel = new HiveAggregate(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), - gbInputRel, false, groupSet, null, aggregateCalls); + gbInputRel, (transformedGroupSets!=null ? true:false), groupSet, + transformedGroupSets, aggregateCalls); } catch (InvalidRelException e) { throw new SemanticException(e); } @@ -1552,6 +1584,19 @@ public class CalcitePlanner extends Sema return aggregateRel; } + private ImmutableBitSet convert(int value) { + BitSet bits = new BitSet(); + int index = 0; + while (value != 0L) { + if (value % 2 != 0) { + bits.set(index); + } + ++index; + value = value >>> 1; + } + return ImmutableBitSet.FROM_BIT_SET.apply(bits); + } + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, RowResolver gByInputRR, RowResolver gByRR) { if (gByExpr.getType() == HiveParser.DOT @@ -1676,7 +1721,7 @@ public class CalcitePlanner extends Sema /** * Generate GB plan. - * + * * @param qb * @param srcRel * @return TODO: 1. Grouping Sets (roll up..) @@ -1686,29 +1731,26 @@ public class CalcitePlanner extends Sema RelNode gbRel = null; QBParseInfo qbp = getQBParseInfo(qb); - // 0. for GSets, Cube, Rollup, bail from Calcite path. - if (!qbp.getDestRollups().isEmpty() || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { - String gbyClause = null; - HashMap<String, ASTNode> gbysMap = qbp.getDestToGroupBy(); - if (gbysMap.size() == 1) { - ASTNode gbyAST = gbysMap.entrySet().iterator().next().getValue(); - gbyClause = ctx.getTokenRewriteStream().toString(gbyAST.getTokenStartIndex(), - gbyAST.getTokenStopIndex()); - gbyClause = "in '" + gbyClause + "'."; - } else { - gbyClause = "."; - } - String msg = String.format("Encountered Grouping Set/Cube/Rollup%s" - + " Currently we don't support Grouping Set/Cube/Rollup" + " clauses in CBO," - + " turn off cbo for these queries.", gbyClause); - LOG.debug(msg); - throw new CalciteSemanticException(msg); - } - // 1. Gather GB Expressions (AST) (GB + Aggregations) // NOTE: Multi Insert is not supported String detsClauseName = qbp.getClauseNames().iterator().next(); + // Check and transform group by *. This will only happen for select distinct *. + // Here the "genSelectPlan" is being leveraged. + // The main benefits are (1) remove virtual columns that should + // not be included in the group by; (2) add the fully qualified column names to unParseTranslator + // so that view is supported. The drawback is that an additional SEL op is added. If it is + // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match + // SEL%SEL% rule. + ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName); + if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI + && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) { + ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + srcRel = genSelectLogicalPlan(qb, srcRel, srcRel); + RowResolver rr = this.relToHiveRR.get(srcRel); + qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); + } + } List<ASTNode> grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); HashMap<String, ASTNode> aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; @@ -1739,18 +1781,34 @@ public class CalcitePlanner extends Sema } } - // 4. Construct aggregation function Info + // 4. GroupingSets, Cube, Rollup + int groupingColsSize = gbExprNDescLst.size(); + List<Integer> groupingSets = null; + if (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() + || !qbp.getDestCubes().isEmpty()) { + if (qbp.getDestRollups().contains(detsClauseName)) { + groupingSets = getGroupingSetsForRollup(grpByAstExprs.size()); + } else if (qbp.getDestCubes().contains(detsClauseName)) { + groupingSets = getGroupingSetsForCube(grpByAstExprs.size()); + } else if (qbp.getDestGroupingSets().contains(detsClauseName)) { + groupingSets = getGroupingSets(grpByAstExprs, qbp, detsClauseName); + } + groupingColsSize = groupingColsSize * 2; + } + + // 5. Construct aggregation function Info ArrayList<AggInfo> aggregations = new ArrayList<AggInfo>(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { - // 4.1 Determine type of UDAF + // 5.1 Determine type of UDAF // This is the GenericUDAF name String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 4.2 Convert UDAF Params to ExprNodeDesc + // 5.2 Convert UDAF Params to ExprNodeDesc ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>(); for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); @@ -1767,18 +1825,62 @@ public class CalcitePlanner extends Sema aggParameters); AggInfo aInfo = new AggInfo(aggParameters, udaf.returnType, aggName, isDistinct); aggregations.add(aInfo); - String field = SemanticAnalyzer.getColumnInternalName(gbExprNDescLst.size() - + aggregations.size() - 1); + String field = getColumnInternalName(groupingColsSize + aggregations.size() - 1); outputColumnNames.add(field); groupByOutputRowResolver.putExpression(value, new ColumnInfo(field, aInfo.m_returnType, "", false)); } } - gbRel = genGBRelNode(gbExprNDescLst, aggregations, srcRel); + gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); this.relToHiveRR.put(gbRel, groupByOutputRowResolver); + + // 6. If GroupingSets, Cube, Rollup were used, we account grouping__id. + // Further, we insert a project operator on top to remove the grouping + // boolean associated to each column in Calcite; this will avoid + // recalculating all column positions when we go back from Calcite to Hive + if(groupingSets != null && !groupingSets.isEmpty()) { + RowResolver selectOutputRowResolver = new RowResolver(); + selectOutputRowResolver.setIsExprResolver(true); + RowResolver.add(selectOutputRowResolver, groupByOutputRowResolver); + outputColumnNames = new ArrayList<String>(outputColumnNames); + + // 6.1 List of columns to keep from groupBy operator + List<RelDataTypeField> gbOutput = gbRel.getRowType().getFieldList(); + List<RexNode> calciteColLst = new ArrayList<RexNode>(); + for(RelDataTypeField gbOut: gbOutput) { + if(gbOut.getIndex() < gbExprNDescLst.size() || + gbOut.getIndex() >= gbExprNDescLst.size() * 2) { + calciteColLst.add(new RexInputRef(gbOut.getIndex(), gbOut.getType())); + } + } + + // 6.2 Add column for grouping_id function + String field = getColumnInternalName(groupingColsSize + aggregations.size()); + outputColumnNames.add(field); + selectOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), + new ColumnInfo( + field, + TypeInfoFactory.stringTypeInfo, + null, + true)); + + // 6.3 Compute column for grouping_id function in Calcite + List<RexNode> identifierCols = new ArrayList<RexNode>(); + for(int i = gbExprNDescLst.size(); i < gbExprNDescLst.size() * 2; i++) { + identifierCols.add(new RexInputRef( + i, gbOutput.get(i).getType())); + } + final RexBuilder rexBuilder = cluster.getRexBuilder(); + RexNode groupingID = rexBuilder.makeCall(HiveGroupingID.GROUPING__ID, + identifierCols); + calciteColLst.add(groupingID); + + // Create select + gbRel = this.genSelectRelNode(calciteColLst, selectOutputRowResolver, gbRel); + } } return gbRel; @@ -1788,7 +1890,7 @@ public class CalcitePlanner extends Sema * Generate OB RelNode and input Select RelNode that should be used to * introduce top constraining Project. If Input select RelNode is not * present then don't introduce top constraining select. - * + * * @param qb * @param srcRel * @param outermostOB @@ -2198,7 +2300,7 @@ public class CalcitePlanner extends Sema /** * NOTE: there can only be one select caluse since we don't handle multi * destination insert. - * + * * @throws SemanticException */ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessAnalyzer.java Fri Jan 23 19:59:11 2015 @@ -17,16 +17,20 @@ */ package org.apache.hadoop.hive.ql.parse; +import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; public class ColumnAccessAnalyzer { - private static final Log LOG = LogFactory.getLog(ColumnAccessAnalyzer.class.getName()); + private static final Log LOG = LogFactory.getLog(ColumnAccessAnalyzer.class.getName()); private final ParseContext pGraphContext; public ColumnAccessAnalyzer() { @@ -39,19 +43,22 @@ public class ColumnAccessAnalyzer { public ColumnAccessInfo analyzeColumnAccess() throws SemanticException { ColumnAccessInfo columnAccessInfo = new ColumnAccessInfo(); - Map<TableScanOperator, Table> topOps = pGraphContext.getTopToTable(); - for (TableScanOperator op : topOps.keySet()) { - Table table = topOps.get(op); - String tableName = table.getCompleteName(); - List<String> referenced = op.getReferencedColumns(); - for (String column : referenced) { - columnAccessInfo.add(tableName, column); - } - if (table.isPartitioned()) { - PrunedPartitionList parts = pGraphContext.getPrunedPartitions(table.getTableName(), op); - if (parts.getReferredPartCols() != null) { - for (String partKey : parts.getReferredPartCols()) { - columnAccessInfo.add(tableName, partKey); + Collection<Operator<? extends OperatorDesc>> topOps = pGraphContext.getTopOps().values(); + for (Operator<? extends OperatorDesc> op : topOps) { + if (op instanceof TableScanOperator) { + TableScanOperator top = (TableScanOperator) op; + Table table = top.getConf().getTableMetadata(); + String tableName = table.getCompleteName(); + List<String> referenced = top.getReferencedColumns(); + for (String column : referenced) { + columnAccessInfo.add(tableName, column); + } + if (table.isPartitioned()) { + PrunedPartitionList parts = pGraphContext.getPrunedPartitions(table.getTableName(), top); + if (parts.getReferredPartCols() != null) { + for (String partKey : parts.getReferredPartCols()) { + columnAccessInfo.add(tableName, partKey); + } } } } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/EximUtil.java Fri Jan 23 19:59:11 2015 @@ -47,6 +47,8 @@ import org.apache.thrift.TDeserializer; import org.apache.thrift.TException; import org.apache.thrift.TSerializer; import org.apache.thrift.protocol.TJSONProtocol; +import org.codehaus.jackson.JsonFactory; +import org.codehaus.jackson.JsonGenerator; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; @@ -168,37 +170,37 @@ public class EximUtil { public static final String METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION = null; public static void createExportDump(FileSystem fs, Path metadataPath, org.apache.hadoop.hive.ql.metadata.Table tableHandle, - List<org.apache.hadoop.hive.ql.metadata.Partition> partitions) throws SemanticException, IOException { + Iterable<org.apache.hadoop.hive.ql.metadata.Partition> partitions) throws SemanticException, IOException { + OutputStream out = fs.create(metadataPath); + JsonGenerator jgen = (new JsonFactory()).createJsonGenerator(out); + jgen.writeStartObject(); + jgen.writeStringField("version",METADATA_FORMAT_VERSION); + if (METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION != null) { + jgen.writeStringField("fcversion",METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION); + } + TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); try { - JSONObject jsonContainer = new JSONObject(); - jsonContainer.put("version", METADATA_FORMAT_VERSION); - if (METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION != null) { - jsonContainer.put("fcversion", METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION); - } - TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); - try { - String tableDesc = serializer.toString(tableHandle.getTTable(), "UTF-8"); - jsonContainer.put("table", tableDesc); - JSONArray jsonPartitions = new JSONArray(); - if (partitions != null) { - for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) { - String partDesc = serializer.toString(partition.getTPartition(), "UTF-8"); - jsonPartitions.put(partDesc); - } + jgen.writeStringField("table", serializer.toString(tableHandle.getTTable(), "UTF-8")); + jgen.writeFieldName("partitions"); + jgen.writeStartArray(); + if (partitions != null) { + for (org.apache.hadoop.hive.ql.metadata.Partition partition : partitions) { + jgen.writeString(serializer.toString(partition.getTPartition(), "UTF-8")); + jgen.flush(); } - jsonContainer.put("partitions", jsonPartitions); - } catch (TException e) { - throw new SemanticException( - ErrorMsg.GENERIC_ERROR - .getMsg("Exception while serializing the metastore objects"), e); } - OutputStream out = fs.create(metadataPath); - out.write(jsonContainer.toString().getBytes("UTF-8")); - out.close(); - - } catch (JSONException e) { - throw new SemanticException(ErrorMsg.GENERIC_ERROR.getMsg("Error in serializing metadata"), e); + jgen.writeEndArray(); + } catch (TException e) { + throw new SemanticException( + ErrorMsg.GENERIC_ERROR + .getMsg("Exception while serializing the metastore objects"), e); } + jgen.writeEndObject(); + jgen.close(); // JsonGenerator owns the OutputStream, so it closes it when we call close. + } + + private static void write(OutputStream out, String s) throws IOException { + out.write(s.getBytes("UTF-8")); } public static Map.Entry<Table, List<Partition>> Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java Fri Jan 23 19:59:11 2015 @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.parse; +import org.apache.hadoop.hive.ql.metadata.PartitionIterable; + import java.io.FileNotFoundException; import java.io.IOException; import java.io.Serializable; @@ -28,6 +30,7 @@ import org.antlr.runtime.tree.Tree; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.exec.Task; @@ -68,7 +71,7 @@ public class ExportSemanticAnalyzer exte throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not a directory : " + toURI)); } else { - FileStatus[] files = fs.listStatus(toPath); + FileStatus[] files = fs.listStatus(toPath, FileUtils.HIDDEN_FILES_PATH_FILTER); if (files != null && files.length != 0) { throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast, "Target is not an empty directory : " + toURI)); @@ -80,11 +83,13 @@ public class ExportSemanticAnalyzer exte throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(ast), e); } - List<Partition> partitions = null; + PartitionIterable partitions = null; try { - partitions = null; if (ts.tableHandle.isPartitioned()) { - partitions = (ts.partitions != null) ? ts.partitions : db.getPartitions(ts.tableHandle); + partitions = (ts.partitions != null) ? + new PartitionIterable(ts.partitions) : + new PartitionIterable(db,ts.tableHandle,null,conf.getIntVar( + HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); } Path path = new Path(ctx.getLocalTmpPath(), "_metadata"); EximUtil.createExportDump(FileSystem.getLocal(conf), path, ts.tableHandle, partitions); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java Fri Jan 23 19:59:11 2015 @@ -191,10 +191,7 @@ public class GenTezUtils { setupMapWork(mapWork, context, partitions, root, alias); - if (context.parseContext != null - && context.parseContext.getTopToTable() != null - && context.parseContext.getTopToTable().containsKey(ts) - && context.parseContext.getTopToTable().get(ts).isDummyTable()) { + if (ts.getConf().getTableMetadata() != null && ts.getConf().getTableMetadata().isDummyTable()) { mapWork.setDummyTableScan(true); } Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Jan 23 19:59:11 2015 @@ -823,6 +823,8 @@ createTableStatement @after { popMsg(state); } : KW_CREATE (temp=KW_TEMPORARY)? (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName ( like=KW_LIKE likeName=tableName + tableRowFormat? + tableFileFormat? tableLocation? tablePropertiesPrefixed? | (LPAREN columnNameTypeList RPAREN)? @@ -2214,7 +2216,7 @@ insertClause @after { popMsg(state); } : KW_INSERT KW_OVERWRITE destination ifNotExists? -> ^(TOK_DESTINATION destination ifNotExists?) - | KW_INSERT KW_INTO KW_TABLE tableOrPartition + | KW_INSERT KW_INTO KW_TABLE? tableOrPartition -> ^(TOK_INSERT_INTO tableOrPartition) ; Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java Fri Jan 23 19:59:11 2015 @@ -35,6 +35,7 @@ import org.apache.commons.lang.ObjectUti import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -357,7 +358,7 @@ public class ImportSemanticAnalyzer exte throws IOException, SemanticException { LOG.debug("checking emptiness of " + targetPath.toString()); if (fs.exists(targetPath)) { - FileStatus[] status = fs.listStatus(targetPath); + FileStatus[] status = fs.listStatus(targetPath, FileUtils.HIDDEN_FILES_PATH_FILTER); if (status.length > 0) { LOG.debug("Files inc. " + status[0].getPath().toString() + " found in path : " + targetPath.toString()); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Fri Jan 23 19:59:11 2015 @@ -926,18 +926,17 @@ public class PTFTranslator { */ for (ColumnInfo inpCInfo : inputRR.getColumnInfos()) { ColumnInfo cInfo = new ColumnInfo(inpCInfo); - String colAlias = cInfo.getAlias(); - String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); - if (tabColAlias != null) { - colAlias = tabColAlias[1]; - } - ASTNode inExpr = null; - inExpr = PTFTranslator.getASTNode(inpCInfo, inpRR); + ASTNode inExpr = PTFTranslator.getASTNode(inpCInfo, inpRR); if (inExpr != null) { rr.putExpression(inExpr, cInfo); } else { - rr.put(cInfo.getTabAlias(), colAlias, cInfo); + String[] tabColAlias = inputRR.reverseLookup(inpCInfo.getInternalName()); + if (tabColAlias != null) { + rr.put(tabColAlias[0], tabColAlias[1], cInfo); + } else { + rr.put(inpCInfo.getTabAlias(), inpCInfo.getAlias(), cInfo); + } } String[] altMapping = inputRR.getAlternateMappings(inpCInfo.getInternalName()); Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Fri Jan 23 19:59:11 2015 @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.Context import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; import org.apache.hadoop.hive.ql.exec.FetchTask; -import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; @@ -43,8 +42,6 @@ import org.apache.hadoop.hive.ql.exec.Ta import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.LineageInfo; import org.apache.hadoop.hive.ql.hooks.ReadEntity; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -73,15 +70,11 @@ public class ParseContext { private HashMap<TableScanOperator, sampleDesc> opToSamplePruner; private Map<TableScanOperator, Map<String, ExprNodeDesc>> opToPartToSkewedPruner; private HashMap<String, Operator<? extends OperatorDesc>> topOps; - private HashMap<String, Operator<? extends OperatorDesc>> topSelOps; private LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx; - private Map<JoinOperator, QBJoinTree> joinContext; - private Map<MapJoinOperator, QBJoinTree> mapJoinContext; - private Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext; - private HashMap<TableScanOperator, Table> topToTable; - private Map<FileSinkOperator, Table> fsopToTable; + private Set<JoinOperator> joinOps; + private Set<MapJoinOperator> mapJoinOps; + private Set<SMBMapJoinOperator> smbMapJoinOps; private List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting; - private HashMap<TableScanOperator, Map<String, String>> topToProps; private HashMap<String, SplitSample> nameToSplitSample; private List<LoadTableDesc> loadTableWork; private List<LoadFileDesc> loadFileWork; @@ -128,15 +121,11 @@ public class ParseContext { * @param opToPartList * @param topOps * list of operators for the top query - * @param topSelOps - * list of operators for the selects introduced for column pruning * @param opParseCtx * operator parse context - contains a mapping from operator to * operator parse state (row resolver etc.) - * @param joinContext + * @param joinOps * context needed join processing (map join specifically) - * @param topToTable - * the top tables being processed * @param loadTableWork * list of destination tables being loaded * @param loadFileWork @@ -163,13 +152,9 @@ public class ParseContext { HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner, HashMap<TableScanOperator, PrunedPartitionList> opToPartList, HashMap<String, Operator<? extends OperatorDesc>> topOps, - HashMap<String, Operator<? extends OperatorDesc>> topSelOps, LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx, - Map<JoinOperator, QBJoinTree> joinContext, - Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext, - HashMap<TableScanOperator, Table> topToTable, - HashMap<TableScanOperator, Map<String, String>> topToProps, - Map<FileSinkOperator, Table> fsopToTable, + Set<JoinOperator> joinOps, + Set<SMBMapJoinOperator> smbMapJoinOps, List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork, Context ctx, HashMap<String, String> idToTableNameMap, int destTableId, UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer, @@ -188,16 +173,12 @@ public class ParseContext { this.ast = ast; this.opToPartPruner = opToPartPruner; this.opToPartList = opToPartList; - this.joinContext = joinContext; - this.smbMapJoinContext = smbMapJoinContext; - this.topToTable = topToTable; - this.fsopToTable = fsopToTable; - this.topToProps = topToProps; + this.joinOps = joinOps; + this.smbMapJoinOps = smbMapJoinOps; this.loadFileWork = loadFileWork; this.loadTableWork = loadTableWork; this.opParseCtx = opParseCtx; this.topOps = topOps; - this.topSelOps = topSelOps; this.ctx = ctx; this.idToTableNameMap = idToTableNameMap; this.destTableId = destTableId; @@ -297,29 +278,6 @@ public class ParseContext { return opToPartList; } - /** - * @return the topToTable - */ - public HashMap<TableScanOperator, Table> getTopToTable() { - return topToTable; - } - - /** - * @param topToTable - * the topToTable to set - */ - public void setTopToTable(HashMap<TableScanOperator, Table> topToTable) { - this.topToTable = topToTable; - } - - public Map<FileSinkOperator, Table> getFsopToTable() { - return fsopToTable; - } - - public void setFsopToTable(Map<FileSinkOperator, Table> fsopToTable) { - this.fsopToTable = fsopToTable; - } - public List<ReduceSinkOperator> getReduceSinkOperatorsAddedByEnforceBucketingSorting() { return reduceSinkOperatorsAddedByEnforceBucketingSorting; } @@ -331,21 +289,6 @@ public class ParseContext { } /** - * @return the topToProps - */ - public HashMap<TableScanOperator, Map<String, String>> getTopToProps() { - return topToProps; - } - - /** - * @param topToProps - * the topToProps to set - */ - public void setTopToProps(HashMap<TableScanOperator, Map<String, String>> topToProps) { - this.topToProps = topToProps; - } - - /** * @return the topOps */ public HashMap<String, Operator<? extends OperatorDesc>> getTopOps() { @@ -361,22 +304,6 @@ public class ParseContext { } /** - * @return the topSelOps - */ - public HashMap<String, Operator<? extends OperatorDesc>> getTopSelOps() { - return topSelOps; - } - - /** - * @param topSelOps - * the topSelOps to set - */ - public void setTopSelOps( - HashMap<String, Operator<? extends OperatorDesc>> topSelOps) { - this.topSelOps = topSelOps; - } - - /** * @return the opParseCtx */ public LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> getOpParseCtx() { @@ -476,18 +403,18 @@ public class ParseContext { } /** - * @return the joinContext + * @return the joinOps */ - public Map<JoinOperator, QBJoinTree> getJoinContext() { - return joinContext; + public Set<JoinOperator> getJoinOps() { + return joinOps; } /** - * @param joinContext - * the joinContext to set + * @param joinOps + * the joinOps to set */ - public void setJoinContext(Map<JoinOperator, QBJoinTree> joinContext) { - this.joinContext = joinContext; + public void setJoinOps(Set<JoinOperator> joinOps) { + this.joinOps = joinOps; } /** @@ -570,20 +497,20 @@ public class ParseContext { return lInfo; } - public Map<MapJoinOperator, QBJoinTree> getMapJoinContext() { - return mapJoinContext; + public Set<MapJoinOperator> getMapJoinOps() { + return mapJoinOps; } - public void setMapJoinContext(Map<MapJoinOperator, QBJoinTree> mapJoinContext) { - this.mapJoinContext = mapJoinContext; + public void setMapJoinOps(Set<MapJoinOperator> mapJoinOps) { + this.mapJoinOps = mapJoinOps; } - public Map<SMBMapJoinOperator, QBJoinTree> getSmbMapJoinContext() { - return smbMapJoinContext; + public Set<SMBMapJoinOperator> getSmbMapJoinOps() { + return smbMapJoinOps; } - public void setSmbMapJoinContext(Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext) { - this.smbMapJoinContext = smbMapJoinContext; + public void setSmbMapJoinOps(Set<SMBMapJoinOperator> smbMapJoinOps) { + this.smbMapJoinOps = smbMapJoinOps; } public GlobalLimitCtx getGlobalLimitCtx() { Modified: hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java URL: http://svn.apache.org/viewvc/hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java?rev=1654355&r1=1654354&r2=1654355&view=diff ============================================================================== --- hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java (original) +++ hive/branches/llap/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java Fri Jan 23 19:59:11 2015 @@ -75,7 +75,7 @@ public class ProcessAnalyzeTable impleme TableScanOperator tableScan = (TableScanOperator) nd; ParseContext parseContext = context.parseContext; - Class<? extends InputFormat> inputFormat = parseContext.getTopToTable().get(tableScan) + Class<? extends InputFormat> inputFormat = tableScan.getConf().getTableMetadata() .getInputFormatClass(); QB queryBlock = parseContext.getQB(); QBParseInfo parseInfo = parseContext.getQB().getParseInfo();