Author: jpullokk Date: Thu Mar 26 23:24:27 2015 New Revision: 1669469 URL: http://svn.apache.org/r1669469 Log: HIVE-10069: CBO (Calcite Return Path): Ambiguity table name causes problem in field trimmer (Laljo John Pullokkaran)
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java?rev=1669469&r1=1669468&r2=1669469&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java Thu Mar 26 23:24:27 2015 @@ -569,12 +569,12 @@ public class HiveCalciteUtil { return deterministic; } - public static ImmutableMap<Integer, ColumnInfo> getColInfoMap(List<ColumnInfo> hiveCols, + public static <T> ImmutableMap<Integer, T> getColInfoMap(List<T> hiveCols, int startIndx) { - Builder<Integer, ColumnInfo> bldr = ImmutableMap.<Integer, ColumnInfo> builder(); + Builder<Integer, T> bldr = ImmutableMap.<Integer, T> builder(); int indx = startIndx; - for (ColumnInfo ci : hiveCols) { + for (T ci : hiveCols) { bldr.put(indx, ci); indx++; } @@ -615,6 +615,18 @@ public class HiveCalciteUtil { indx++; } + return bldr.build(); + } + + public static ImmutableMap<String, Integer> getRowColNameIndxMap(List<RelDataTypeField> rowFields) { + Builder<String, Integer> bldr = ImmutableMap.<String, Integer> builder(); + + int indx = 0; + for (RelDataTypeField rdt : rowFields) { + bldr.put(rdt.getName(), indx); + indx++; + } + return bldr.build(); } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java?rev=1669469&r1=1669468&r2=1669469&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java Thu Mar 26 23:24:27 2015 @@ -454,4 +454,16 @@ public class RelOptHiveTable extends Rel public String getQBID() { return qbID; } + + public int getNoOfNonVirtualCols() { + return noOfNonVirtualCols; + } + + public Map<Integer, ColumnInfo> getPartColInfoMap() { + return hivePartitionColsMap; + } + + public Map<Integer, ColumnInfo> getNonPartColInfoMap() { + return hiveNonPartitionColsMap; + } } Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java?rev=1669469&r1=1669468&r2=1669469&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveTableScan.java Thu Mar 26 23:24:27 2015 @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimi import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import org.apache.calcite.plan.RelOptCluster; @@ -37,6 +38,9 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCost; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; + /** * Relational expression representing a scan of a HiveDB collection. @@ -47,7 +51,9 @@ import org.apache.hadoop.hive.ql.plan.Co * </p> */ public class HiveTableScan extends TableScan implements HiveRelNode { - + private final RelDataType rowtype; + private final ImmutableList<Integer> neededColIndxsFrmReloptHT; + /** * Creates a HiveTableScan. * @@ -61,8 +67,15 @@ public class HiveTableScan extends Table * HiveDB table */ public HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table) { + this(cluster, traitSet, table, table.getRowType()); + } + + private HiveTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptHiveTable table, + RelDataType newRowtype) { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), table); assert getConvention() == HiveRelNode.CONVENTION; + this.rowtype = newRowtype; + this.neededColIndxsFrmReloptHT = buildNeededColIndxsFrmReloptHT(table.getRowType(), newRowtype); } @Override @@ -79,7 +92,12 @@ public class HiveTableScan extends Table * @return */ public HiveTableScan copy(RelDataType newRowtype) { - return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table).copy(newRowtype)); + return new HiveTableScan(getCluster(), getTraitSet(), ((RelOptHiveTable) table)); + } + + @Override + public RelDataType deriveRowType() { + return rowtype; } @Override @@ -137,4 +155,22 @@ public class HiveTableScan extends Table return newHT; } + + public List<Integer> getNeededColIndxsFrmReloptHT() { + return neededColIndxsFrmReloptHT; + } + + private static ImmutableList<Integer> buildNeededColIndxsFrmReloptHT(RelDataType htRowtype, + RelDataType scanRowType) { + Builder<Integer> neededColIndxsFrmReloptHTBldr = new ImmutableList.Builder<Integer>(); + Map<String, Integer> colNameToPosInReloptHT = HiveCalciteUtil.getRowColNameIndxMap(htRowtype + .getFieldList()); + List<String> colNamesInScanRowType = scanRowType.getFieldNames(); + + for (int i = 0; i < colNamesInScanRowType.size(); i++) { + neededColIndxsFrmReloptHTBldr.add(colNameToPosInReloptHT.get(colNamesInScanRowType.get(i))); + } + + return neededColIndxsFrmReloptHTBldr.build(); + } } \ No newline at end of file Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java?rev=1669469&r1=1669468&r2=1669469&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java Thu Mar 26 23:24:27 2015 @@ -35,7 +35,6 @@ import org.apache.calcite.rel.RelFieldCo import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.logical.LogicalExchange; -import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -192,48 +191,60 @@ public class HiveOpConverter { } RelOptHiveTable ht = (RelOptHiveTable) scanRel.getTable(); - Map<Integer, VirtualColumn> newVColMap = new HashMap<Integer, VirtualColumn>(); // 1. Setup TableScan Desc - // 1.1 Create TableScanDesc - String tableAlias = ht.getTableAlias(); + // 1.1 Build col details used by scan + ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>(); List<VirtualColumn> virtualCols = new ArrayList<VirtualColumn>(ht.getVirtualCols()); - TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD()); - - // 1.2. Set Partition cols in TSDesc - List<ColumnInfo> partColInfos = ht.getPartColumns(); + Map<Integer, VirtualColumn> hiveScanVColMap = new HashMap<Integer, VirtualColumn>(); List<String> partColNames = new ArrayList<String>(); - for (ColumnInfo ci : partColInfos) { - partColNames.add(ci.getInternalName()); - } - tsd.setPartColumns(partColNames); - - // 1.3. Set needed cols in TSDesc List<Integer> neededColumnIDs = new ArrayList<Integer>(); List<String> neededColumns = new ArrayList<String>(); - Map<String, Integer> colNameToIndxMap = HiveCalciteUtil.getColNameIndxMap(ht.getHiveTableMD() - .getCols()); - for (RelDataTypeField rdtf : scanRel.getRowType().getFieldList()) { - neededColumnIDs.add(colNameToIndxMap.get(rdtf.getName())); - neededColumns.add(rdtf.getName()); + + Map<Integer, VirtualColumn> posToVColMap = HiveCalciteUtil.getVColsMap(virtualCols, + ht.getNoOfNonVirtualCols()); + Map<Integer, ColumnInfo> posToPartColInfo = ht.getPartColInfoMap(); + Map<Integer, ColumnInfo> posToNonPartColInfo = ht.getNonPartColInfoMap(); + List<Integer> neededColIndxsFrmReloptHT = scanRel.getNeededColIndxsFrmReloptHT(); + List<String> scanColNames = scanRel.getRowType().getFieldNames(); + String tableAlias = ht.getTableAlias(); + + String colName; + ColumnInfo colInfo; + VirtualColumn vc; + Integer posInRHT; + + for (int i = 0; i < neededColIndxsFrmReloptHT.size(); i++) { + colName = scanColNames.get(i); + posInRHT = neededColIndxsFrmReloptHT.get(i); + if (posToVColMap.containsKey(posInRHT)) { + vc = posToVColMap.get(posInRHT); + virtualCols.add(vc); + colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden()); + hiveScanVColMap.put(i, vc); + } else if (posToPartColInfo.containsKey(posInRHT)) { + partColNames.add(colName); + colInfo = posToPartColInfo.get(posInRHT); + } else { + colInfo = posToNonPartColInfo.get(posInRHT); + } + neededColumnIDs.add(posInRHT); + neededColumns.add(colName); + colInfos.add(colInfo); } + + // 1.2 Create TableScanDesc + TableScanDesc tsd = new TableScanDesc(tableAlias, virtualCols, ht.getHiveTableMD()); + + // 1.3. Set Partition cols in TSDesc + tsd.setPartColumns(partColNames); + + // 1.4. Set needed cols in TSDesc tsd.setNeededColumnIDs(neededColumnIDs); tsd.setNeededColumns(neededColumns); // 2. Setup TableScan - TableScanOperator ts = null; - // 2.1 Construct ordered colInfo list for TS RowSchema & update vcolMap - ArrayList<ColumnInfo> colInfos = new ArrayList<ColumnInfo>(ht.getNonPartColumns()); - colInfos.addAll(ht.getPartColumns()); - ColumnInfo ci; - for (VirtualColumn vc : virtualCols) { - ci = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true, vc.getIsHidden()); - colInfos.add(ci); - newVColMap.put(colInfos.size(), vc); - } - - // 2.2. Create TS OP - ts = (TableScanOperator) OperatorFactory.get(tsd, new RowSchema(colInfos)); + TableScanOperator ts = (TableScanOperator) OperatorFactory.get(tsd, new RowSchema(colInfos)); topOps.put(ht.getQBID(), ts); @@ -241,7 +252,7 @@ public class HiveOpConverter { LOG.debug("Generated " + ts + " with row schema: [" + ts.getSchema() + "]"); } - return new OpAttr(tableAlias, newVColMap, ts); + return new OpAttr(tableAlias, hiveScanVColMap, ts); } OpAttr visit(HiveProject projectRel) throws SemanticException { @@ -877,5 +888,4 @@ public class HiveOpConverter { return new Pair<ArrayList<ColumnInfo>, Map<Integer, VirtualColumn>>(colInfos, newVColMap); } - }