Repository: hive Updated Branches: refs/heads/master 0b38612f6 -> 6df52edc5
HIVE-11437: CBO: Calcite Operator To Hive Operator (Calcite Return Path) : dealing with insert into (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6df52edc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6df52edc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6df52edc Branch: refs/heads/master Commit: 6df52edc5ec7dea80b271897128d5037d2d90ef0 Parents: 0b38612 Author: Pengcheng Xiong <pxi...@hortonworks.com> Authored: Mon Aug 10 13:00:59 2015 +0300 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Mon Aug 10 13:00:59 2015 +0300 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 40 ++++++++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 2 +- .../test/queries/clientpositive/cbo_rp_insert.q | 17 ++++ .../results/clientpositive/cbo_rp_insert.q.out | 89 ++++++++++++++++++++ 4 files changed, 146 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 4027229..f26d1df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -109,6 +109,8 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -170,6 +172,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde.serdeConstants; @@ -649,7 +652,42 @@ public class CalcitePlanner extends SemanticAnalyzer { conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan); RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB()); opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR)); - return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot); + String dest = getQB().getParseInfo().getClauseNames().iterator().next(); + if (getQB().getParseInfo().getDestSchemaForClause(dest) != null + && this.getQB().getTableDesc() == null) { + Operator<?> selOp = handleInsertStatement(dest, hiveRoot, hiveRootRR, getQB()); + return genFileSinkPlan(dest, getQB(), selOp); + } else { + return genFileSinkPlan(dest, getQB(), hiveRoot); + } + } + + // This function serves as the wrapper of handleInsertStatementSpec in + // SemanticAnalyzer + Operator<?> handleInsertStatement(String dest, Operator<?> input, RowResolver inputRR, QB qb) + throws SemanticException { + ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>(); + ArrayList<ColumnInfo> columns = inputRR.getColumnInfos(); + for (int i = 0; i < columns.size(); i++) { + ColumnInfo col = columns.get(i); + colList.add(new ExprNodeColumnDesc(col)); + } + ASTNode selExprList = qb.getParseInfo().getSelForClause(dest); + + RowResolver out_rwsch = handleInsertStatementSpec(colList, dest, inputRR, inputRR, qb, + selExprList); + + ArrayList<String> columnNames = new ArrayList<String>(); + Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>(); + for (int i = 0; i < colList.size(); i++) { + String outputCol = getColumnInternalName(i); + colExprMap.put(outputCol, colList.get(i)); + columnNames.add(outputCol); + } + Operator<?> output = putOpInsertMap(OperatorFactory.getAndMakeChild(new SelectDesc(colList, + columnNames), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); + output.setColumnExprMap(colExprMap); + return output; } private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan) http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index fe7c1ca..5ea6f3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -3883,7 +3883,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * @see #handleInsertStatementSpecPhase1(ASTNode, QBParseInfo, org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx) * @throws SemanticException */ - private RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, + public RowResolver handleInsertStatementSpec(List<ExprNodeDesc> col_list, String dest, RowResolver outputRR, RowResolver inputRR, QB qb, ASTNode selExprList) throws SemanticException { //(z,x) http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/queries/clientpositive/cbo_rp_insert.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/cbo_rp_insert.q b/ql/src/test/queries/clientpositive/cbo_rp_insert.q new file mode 100644 index 0000000..eeaeec2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/cbo_rp_insert.q @@ -0,0 +1,17 @@ +set hive.cbo.enable=true; +set hive.cbo.returnpath.hiveop=true; + +drop database if exists x314 cascade; +create database x314; +use x314; +create table source(s1 int, s2 int); +create table target1(x int, y int, z int); + +insert into source(s2,s1) values(2,1); +-- expect source to contain 1 row (1,2) +select * from source; +insert into target1(z,x) select * from source; +-- expect target1 to contain 1 row (2,NULL,1) +select * from target1; + +drop database if exists x314 cascade; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/6df52edc/ql/src/test/results/clientpositive/cbo_rp_insert.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out new file mode 100644 index 0000000..6428a4b --- /dev/null +++ b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: drop database if exists x314 cascade +PREHOOK: type: DROPDATABASE +POSTHOOK: query: drop database if exists x314 cascade +POSTHOOK: type: DROPDATABASE +PREHOOK: query: create database x314 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:x314 +POSTHOOK: query: create database x314 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:x314 +PREHOOK: query: use x314 +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:x314 +POSTHOOK: query: use x314 +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:x314 +PREHOOK: query: create table source(s1 int, s2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@source +POSTHOOK: query: create table source(s1 int, s2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@source +PREHOOK: query: create table target1(x int, y int, z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@target1 +POSTHOOK: query: create table target1(x int, y int, z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@target1 +PREHOOK: query: insert into source(s2,s1) values(2,1) +PREHOOK: type: QUERY +PREHOOK: Input: x314@values__tmp__table__1 +PREHOOK: Output: x314@source +POSTHOOK: query: insert into source(s2,s1) values(2,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@values__tmp__table__1 +POSTHOOK: Output: x314@source +POSTHOOK: Lineage: source.s1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: source.s2 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: -- expect source to contain 1 row (1,2) +select * from source +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +#### A masked pattern was here #### +POSTHOOK: query: -- expect source to contain 1 row (1,2) +select * from source +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +#### A masked pattern was here #### +1 2 +PREHOOK: query: insert into target1(z,x) select * from source +PREHOOK: type: QUERY +PREHOOK: Input: x314@source +PREHOOK: Output: x314@target1 +POSTHOOK: query: insert into target1(z,x) select * from source +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@source +POSTHOOK: Output: x314@target1 +POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] +POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] +PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) +select * from target1 +PREHOOK: type: QUERY +PREHOOK: Input: x314@target1 +#### A masked pattern was here #### +POSTHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) +select * from target1 +POSTHOOK: type: QUERY +POSTHOOK: Input: x314@target1 +#### A masked pattern was here #### +2 NULL 1 +PREHOOK: query: drop database if exists x314 cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:x314 +PREHOOK: Output: database:x314 +PREHOOK: Output: x314@source +PREHOOK: Output: x314@target1 +PREHOOK: Output: x314@values__tmp__table__1 +POSTHOOK: query: drop database if exists x314 cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:x314 +POSTHOOK: Output: database:x314 +POSTHOOK: Output: x314@source +POSTHOOK: Output: x314@target1 +POSTHOOK: Output: x314@values__tmp__table__1