hive git commit: HIVE-15983: Support the named columns join (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

pxiong Thu, 16 Mar 2017 10:30:51 -0700

Repository: hive
Updated Branches:
  refs/heads/master 62130b6c3 -> 74ca2eedf



HIVE-15983: Support the named columns join (Pengcheng Xiong, reviewed by 
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/74ca2eed
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/74ca2eed
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/74ca2eed

Branch: refs/heads/master
Commit: 74ca2eedf838df30414a583df8139fe4d87b4a7d
Parents: 62130b6
Author: Pengcheng Xiong <[email protected]>
Authored: Thu Mar 16 10:30:48 2017 -0700
Committer: Pengcheng Xiong <[email protected]>
Committed: Thu Mar 16 10:30:48 2017 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  48 +-
 .../hadoop/hive/ql/parse/FromClauseParser.g     |   3 +-
 .../hadoop/hive/ql/parse/NamedJoinInfo.java     |  65 +++
 .../hadoop/hive/ql/parse/RowResolver.java       |  10 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  94 +++-
 .../queries/clientpositive/named_column_join.q  |  52 ++
 .../clientpositive/named_column_join.q.out      | 482 +++++++++++++++++++
 7 files changed, 734 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 34a6dd4..fc7d510 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -205,6 +205,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewFilterScanRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTBuilder;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory;
@@ -1907,7 +1908,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return setOpRel;
     }
 
-    private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType 
hiveJoinType,
+    private RelNode genJoinRelNode(RelNode leftRel, String leftTableAlias, 
RelNode rightRel, String rightTableAlias, JoinType hiveJoinType,
         ASTNode joinCond) throws SemanticException {
 
       RowResolver leftRR = this.relToHiveRR.get(leftRel);
@@ -1915,10 +1916,38 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 1. Construct ExpressionNodeDesc representing Join Condition
       RexNode calciteJoinCond = null;
+      List<String> namedColumns = null;
       if (joinCond != null) {
         JoinTypeCheckCtx jCtx = new JoinTypeCheckCtx(leftRR, rightRR, 
hiveJoinType);
         RowResolver input = RowResolver.getCombinedRR(leftRR, rightRR);
-        if (unparseTranslator != null && unparseTranslator.isEnabled()) {
+        // named columns join
+        // TODO: we can also do the same for semi join but it seems that other
+        // DBMS does not support it yet.
+        if (joinCond.getType() == HiveParser.TOK_TABCOLNAME
+            && !hiveJoinType.equals(JoinType.LEFTSEMI)) {
+          namedColumns = new ArrayList<>();
+          // We will transform using clause and make it look like an on-clause.
+          // So, lets generate a valid on-clause AST from using.
+          ASTNode and = (ASTNode) 
ParseDriver.adaptor.create(HiveParser.KW_AND, "and");
+          ASTNode equal = null;
+          int count = 0;
+          for (Node child : joinCond.getChildren()) {
+            String columnName = ((ASTNode) child).getText();
+            // dealing with views
+            if (unparseTranslator != null && unparseTranslator.isEnabled()) {
+              unparseTranslator.addIdentifierTranslation((ASTNode) child);
+            }
+            namedColumns.add(columnName);
+            ASTNode left = ASTBuilder.qualifiedName(leftTableAlias, 
columnName);
+            ASTNode right = ASTBuilder.qualifiedName(rightTableAlias, 
columnName);
+            equal = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, 
"=");
+            ParseDriver.adaptor.addChild(equal, left);
+            ParseDriver.adaptor.addChild(equal, right);
+            ParseDriver.adaptor.addChild(and, equal);
+            count++;
+          }
+          joinCond = count > 1 ? and : equal;
+        } else if (unparseTranslator != null && unparseTranslator.isEnabled()) 
{
           genAllExprNodeDesc(joinCond, input, jCtx);
         }
         Map<ASTNode, ExprNodeDesc> exprNodes = 
JoinCondTypeCheckProcFactory.genExprNode(joinCond,
@@ -2032,12 +2061,17 @@ public class CalcitePlanner extends SemanticAnalyzer {
       } else {
         topRel = HiveJoin.getJoin(cluster, leftRel, rightRel, calciteJoinCond, 
calciteJoinType);
         topRR = RowResolver.getCombinedRR(leftRR, rightRR);
+        if (namedColumns != null) {
+          List<String> tableAliases = new ArrayList<>();
+          tableAliases.add(leftTableAlias);
+          tableAliases.add(rightTableAlias);
+          topRR.setNamedJoinInfo(new NamedJoinInfo(tableAliases, namedColumns, 
hiveJoinType));
+        }
       }
 
       // 4. Add new rel & its RR to the maps
       relToHiveColNameCalcitePosMap.put(topRel, 
this.buildHiveToCalciteColumnMap(topRR, topRel));
       relToHiveRR.put(topRel, topRR);
-
       return topRel;
     }
 
@@ -2086,12 +2120,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 2. Get Left Table Alias
       ASTNode left = (ASTNode) joinParseTree.getChild(0);
+      String leftTableAlias = null;
       if ((left.getToken().getType() == HiveParser.TOK_TABREF)
           || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
           || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
         String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
             (ASTNode) left.getChild(0)).toLowerCase();
-        String leftTableAlias = left.getChildCount() == 1 ? tableName : 
SemanticAnalyzer
+        leftTableAlias = left.getChildCount() == 1 ? tableName : 
SemanticAnalyzer
             .unescapeIdentifier(left.getChild(left.getChildCount() - 
1).getText().toLowerCase());
         // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
         // partitionTableFunctionSource partitioningSpec? expression*)
@@ -2107,12 +2142,13 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // 3. Get Right Table Alias
       ASTNode right = (ASTNode) joinParseTree.getChild(1);
+      String rightTableAlias = null;
       if ((right.getToken().getType() == HiveParser.TOK_TABREF)
           || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
           || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
         String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName(
             (ASTNode) right.getChild(0)).toLowerCase();
-        String rightTableAlias = right.getChildCount() == 1 ? tableName : 
SemanticAnalyzer
+        rightTableAlias = right.getChildCount() == 1 ? tableName : 
SemanticAnalyzer
             .unescapeIdentifier(right.getChild(right.getChildCount() - 
1).getText().toLowerCase());
         // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
         // partitionTableFunctionSource partitioningSpec? expression*)
@@ -2128,7 +2164,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       ASTNode joinCond = (ASTNode) joinParseTree.getChild(2);
 
       // 5. Create Join rel
-      return genJoinRelNode(leftRel, rightRel, hiveJoinType, joinCond);
+      return genJoinRelNode(leftRel, leftTableAlias, rightRel, 
rightTableAlias, hiveJoinType, joinCond);
     }
 
     private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws 
SemanticException {

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
index 558ca7f..00109f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g
@@ -110,10 +110,9 @@ atomjoinSource
 
 joinSource
     :
-    atomjoinSource (joinToken^ joinSourcePart (KW_ON! expression 
{$joinToken.start.getType() != COMMA}?)?)*
+    atomjoinSource (joinToken^ joinSourcePart (KW_ON! expression 
{$joinToken.start.getType() != COMMA}? | KW_USING! columnParenthesesList 
{$joinToken.start.getType() != COMMA}?)?)*
     ;
 
-
 joinSourcePart
 @init { gParent.pushMsg("joinSourcePart", state); }
 @after { gParent.popMsg(state); }

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/java/org/apache/hadoop/hive/ql/parse/NamedJoinInfo.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/NamedJoinInfo.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/NamedJoinInfo.java
new file mode 100644
index 0000000..62ef939
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/NamedJoinInfo.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+
+/**
+ * NamedColsInJoin encapsulates information about using clause of join. e.g.,
+ * select * from a join b using(c); This class describes tables a and b, column
+ * c and join type.
+ *
+ */
+public class NamedJoinInfo {
+  private List<String> tableAliases;
+  private List<String> namedColumns;
+  private JoinType hiveJoinType;
+
+  public NamedJoinInfo(List<String> aliases, List<String> namedColumns, 
JoinType hiveJoinType) {
+    super();
+    this.tableAliases = aliases;
+    this.namedColumns = namedColumns;
+    this.hiveJoinType = hiveJoinType;
+  }
+
+  public List<String> getAliases() {
+    return tableAliases;
+  }
+
+  public void setAliases(List<String> aliases) {
+    this.tableAliases = aliases;
+  }
+
+  public List<String> getNamedColumns() {
+    return namedColumns;
+  }
+
+  public void setNamedColumns(List<String> namedColumns) {
+    this.namedColumns = namedColumns;
+  }
+
+  public JoinType getHiveJoinType() {
+    return hiveJoinType;
+  }
+
+  public void setHiveJoinType(JoinType hiveJoinType) {
+    this.hiveJoinType = hiveJoinType;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
index e14f1cf..262dafb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
@@ -56,6 +56,8 @@ public class RowResolver implements Serializable{
 
   private static final Logger LOG = 
LoggerFactory.getLogger(RowResolver.class.getName());
 
+  private NamedJoinInfo namedJoinInfo;
+
   public RowResolver() {
     rowSchema = new RowSchema();
     rslvMap = new LinkedHashMap<String, LinkedHashMap<String, ColumnInfo>>();
@@ -469,4 +471,12 @@ public class RowResolver implements Serializable{
   private HashMap<String, String[]> getInvRslvMap() {
     return invRslvMap; // If making this public, note that its ordering is 
undefined.
   }
+
+  public NamedJoinInfo getNamedJoinInfo() {
+    return namedJoinInfo;
+  }
+
+  public void setNamedJoinInfo(NamedJoinInfo namedJoinInfo) {
+    this.namedJoinInfo = namedJoinInfo;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 0732207..f2a6ade 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -3427,6 +3427,69 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     Map<ColumnInfo, ColumnInfo> inputColsProcessed = new HashMap<ColumnInfo, 
ColumnInfo>();
     // For expr "*", aliases should be iterated in the order they are specified
     // in the query.
+
+    if (colSrcRR.getNamedJoinInfo() != null) {
+      // We got using() clause in previous join. Need to generate select list 
as
+      // per standard. For * we will have joining columns first non-repeated
+      // followed by other columns.
+      HashMap<String, ColumnInfo> leftMap = 
colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(0));
+      HashMap<String, ColumnInfo> rightMap = 
colSrcRR.getFieldMap(colSrcRR.getNamedJoinInfo().getAliases().get(1));
+      HashMap<String, ColumnInfo> chosenMap = null;
+      if (colSrcRR.getNamedJoinInfo().getHiveJoinType() != 
JoinType.RIGHTOUTER) {
+        chosenMap = leftMap;
+      } else {
+        chosenMap = rightMap;
+      }
+      // first get the columns in named columns
+      for (String columnName : colSrcRR.getNamedJoinInfo().getNamedColumns()) {
+        for (Map.Entry<String, ColumnInfo> entry : chosenMap.entrySet()) {
+          ColumnInfo colInfo = entry.getValue();
+          if (!columnName.equals(colInfo.getAlias())) {
+            continue;
+          }
+          String name = colInfo.getInternalName();
+          String[] tmp = colSrcRR.reverseLookup(name);
+
+          // Skip the colinfos which are not for this particular alias
+          if (tabAlias != null && !tmp[0].equalsIgnoreCase(tabAlias)) {
+            continue;
+          }
+
+          if (colInfo.getIsVirtualCol() && colInfo.isHiddenVirtualCol()) {
+            continue;
+          }
+          ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
+          if (oColInfo == null) {
+            ExprNodeColumnDesc expr = new 
ExprNodeColumnDesc(colInfo.getType(), name,
+                colInfo.getTabAlias(), colInfo.getIsVirtualCol(), 
colInfo.isSkewedCol());
+            col_list.add(expr);
+            oColInfo = new ColumnInfo(getColumnInternalName(pos), 
colInfo.getType(),
+                colInfo.getTabAlias(), colInfo.getIsVirtualCol(), 
colInfo.isHiddenVirtualCol());
+            inputColsProcessed.put(colInfo, oColInfo);
+          }
+          if (ensureUniqueCols) {
+            if (!output.putWithCheck(tmp[0], tmp[1], null, oColInfo)) {
+              throw new CalciteSemanticException("Cannot add column to RR: " + 
tmp[0] + "."
+                  + tmp[1] + " => " + oColInfo + " due to duplication, see 
previous warnings",
+                  UnsupportedFeature.Duplicates_in_RR);
+            }
+          } else {
+            output.put(tmp[0], tmp[1], oColInfo);
+          }
+          pos = Integer.valueOf(pos.intValue() + 1);
+          matched++;
+
+          if (unparseTranslator.isEnabled() || tableMask.isEnabled()) {
+            if (replacementText.length() > 0) {
+              replacementText.append(", ");
+            }
+            replacementText.append(HiveUtils.unparseIdentifier(tmp[0], conf));
+            replacementText.append(".");
+            replacementText.append(HiveUtils.unparseIdentifier(tmp[1], conf));
+          }
+        }
+      }
+    }
     for (String alias : aliases) {
       HashMap<String, ColumnInfo> fMap = colSrcRR.getFieldMap(alias);
       if (fMap == null) {
@@ -3436,12 +3499,19 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       // from the input schema
       for (Map.Entry<String, ColumnInfo> entry : fMap.entrySet()) {
         ColumnInfo colInfo = entry.getValue();
+        if (colSrcRR.getNamedJoinInfo() != null && 
colSrcRR.getNamedJoinInfo().getNamedColumns().contains(colInfo.getAlias())) {
+          // we already added this column in select list.
+          continue;
+        }
         if (excludeCols != null && excludeCols.contains(colInfo)) {
           continue; // This was added during plan generation.
         }
-        // First, look up the column from the source against which * is to be 
resolved.
-        // We'd later translated this into the column from proper input, if 
it's valid.
-        // TODO: excludeCols may be possible to remove using the same 
technique.
+        // First, look up the column from the source against which * is to be
+        // resolved.
+        // We'd later translated this into the column from proper input, if
+        // it's valid.
+        // TODO: excludeCols may be possible to remove using the same
+        // technique.
         String name = colInfo.getInternalName();
         String[] tmp = colSrcRR.reverseLookup(name);
 
@@ -3459,13 +3529,14 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
           continue;
         }
 
-        // If input (GBY) is different than the source of columns, find the 
same column in input.
+        // If input (GBY) is different than the source of columns, find the
+        // same column in input.
         // TODO: This is fraught with peril.
         if (input != colSrcRR) {
           colInfo = input.get(tabAlias, tmp[1]);
           if (colInfo == null) {
-            LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1]
-                + ", derived from [" + colSrcRR + "], in [" + input + "]");
+            LOG.error("Cannot find colInfo for " + tabAlias + "." + tmp[1] + 
", derived from ["
+                + colSrcRR + "], in [" + input + "]");
             throw new SemanticException(ErrorMsg.NON_KEY_EXPR_IN_GROUPBY, 
tmp[1]);
           }
           String oldCol = null;
@@ -3482,13 +3553,11 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
 
         ColumnInfo oColInfo = inputColsProcessed.get(colInfo);
         if (oColInfo == null) {
-          ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(),
-              name, colInfo.getTabAlias(), colInfo.getIsVirtualCol(),
-              colInfo.isSkewedCol());
+          ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), 
name,
+              colInfo.getTabAlias(), colInfo.getIsVirtualCol(), 
colInfo.isSkewedCol());
           col_list.add(expr);
-          oColInfo = new ColumnInfo(getColumnInternalName(pos),
-              colInfo.getType(), colInfo.getTabAlias(),
-              colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
+          oColInfo = new ColumnInfo(getColumnInternalName(pos), 
colInfo.getType(),
+              colInfo.getTabAlias(), colInfo.getIsVirtualCol(), 
colInfo.isHiddenVirtualCol());
           inputColsProcessed.put(colInfo, oColInfo);
         }
         if (ensureUniqueCols) {
@@ -3513,6 +3582,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         }
       }
     }
+
     if (matched == 0) {
       throw new SemanticException(ErrorMsg.INVALID_COLUMN.getMsg(sel));
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/test/queries/clientpositive/named_column_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/named_column_join.q 
b/ql/src/test/queries/clientpositive/named_column_join.q
new file mode 100644
index 0000000..6de9325
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/named_column_join.q
@@ -0,0 +1,52 @@
+set hive.mapred.mode=nonstrict;
+-- SORT_QUERY_RESULTS
+
+create table t (a int, b int);
+
+insert into t values 
(1,2),(2,1),(3,4),(4,3),(3,3),(null,null),(null,1),(2,null);
+
+explain select * from t t1 join t t2 using (a);
+
+select * from t t1 join t t2 using (a);
+
+select * from t t1 join t t2 using (a,b);
+
+select t1.a,t2.b,t1.b,t2.a from t t1 join t t2 using (a);
+
+select * from t t1 left outer join t t2 using (a,b);
+
+select t1.a,t1.b from t t1 right outer join t t2 on (t1.a=t2.a and t1.b=t2.b);
+
+select * from t t1 right outer join t t2 using (a,b);
+
+select * from t t1 inner join t t2 using (a,b);
+
+select * from t t1 left outer join t t2 using (b);
+
+select * from t t1 right outer join t t2 using (b);
+
+select * from t t1 inner join t t2 using (b);
+
+drop view v;
+
+create view v as select * from t t1 join t t2 using (a,b);
+
+desc formatted v;
+
+select * from v;
+
+drop view v;
+
+create view v as select * from t t1 right outer join t t2 using (b,a);
+
+desc formatted v;
+
+select * from v;
+
+select * from (select t1.b b from t t1 inner join t t2 using (b)) t3 join t t4 
using(b);
+
+select * from (select t2.a a from t t1 inner join t t2 using (b)) t3 join t t4 
using(a);
+
+create table tt as select * from (select t2.a a from t t1 inner join t t2 
using (b)) t3 join t t4 using(a);
+
+desc formatted tt;

http://git-wip-us.apache.org/repos/asf/hive/blob/74ca2eed/ql/src/test/results/clientpositive/named_column_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/named_column_join.q.out 
b/ql/src/test/results/clientpositive/named_column_join.q.out
new file mode 100644
index 0000000..031081a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/named_column_join.q.out
@@ -0,0 +1,482 @@
+PREHOOK: query: create table t (a int, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a int, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values 
(1,2),(2,1),(3,4),(4,3),(3,3),(null,null),(null,1),(2,null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values 
(1,2),(2,1),(3,4),(4,3),(3,3),(null,null),(null,1),(2,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: t.b EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain select * from t t1 join t t2 using (a)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t t1 join t t2 using (a)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column 
stats: NONE
+            Filter Operator
+              predicate: a is not null (type: boolean)
+              Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: a (type: int), b (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: int)
+          TableScan
+            alias: t2
+            Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE Column 
stats: NONE
+            Filter Operator
+              predicate: a is not null (type: boolean)
+              Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: a (type: int), b (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 8 Data size: 28 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col1 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col3
+          Statistics: Num rows: 8 Data size: 30 Basic stats: COMPLETE Column 
stats: NONE
+          Select Operator
+            expressions: _col0 (type: int), _col1 (type: int), _col3 (type: 
int)
+            outputColumnNames: _col0, _col1, _col2
+            Statistics: Num rows: 8 Data size: 30 Basic stats: COMPLETE Column 
stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 8 Data size: 30 Basic stats: COMPLETE 
Column stats: NONE
+              table:
+                  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from t t1 join t t2 using (a)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 join t t2 using (a)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2       2
+2      1       1
+2      1       NULL
+2      NULL    1
+2      NULL    NULL
+3      3       3
+3      3       4
+3      4       3
+3      4       4
+4      3       3
+PREHOOK: query: select * from t t1 join t t2 using (a,b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 join t t2 using (a,b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+3      3
+3      4
+4      3
+PREHOOK: query: select t1.a,t2.b,t1.b,t2.a from t t1 join t t2 using (a)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.a,t2.b,t1.b,t2.a from t t1 join t t2 using (a)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2       2       1
+2      1       1       2
+2      1       NULL    2
+2      NULL    1       2
+2      NULL    NULL    2
+3      3       3       3
+3      3       4       3
+3      4       3       3
+3      4       4       3
+4      3       3       4
+PREHOOK: query: select * from t t1 left outer join t t2 using (a,b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 left outer join t t2 using (a,b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+2      NULL
+3      3
+3      4
+4      3
+NULL   1
+NULL   NULL
+PREHOOK: query: select t1.a,t1.b from t t1 right outer join t t2 on (t1.a=t2.a 
and t1.b=t2.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select t1.a,t1.b from t t1 right outer join t t2 on 
(t1.a=t2.a and t1.b=t2.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+3      3
+3      4
+4      3
+NULL   NULL
+NULL   NULL
+NULL   NULL
+PREHOOK: query: select * from t t1 right outer join t t2 using (a,b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 right outer join t t2 using (a,b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+2      NULL
+3      3
+3      4
+4      3
+NULL   1
+NULL   NULL
+PREHOOK: query: select * from t t1 inner join t t2 using (a,b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 inner join t t2 using (a,b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+3      3
+3      4
+4      3
+PREHOOK: query: select * from t t1 left outer join t t2 using (b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 left outer join t t2 using (b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2       2
+1      2       NULL
+1      NULL    2
+1      NULL    NULL
+2      1       1
+3      3       3
+3      3       4
+3      4       3
+3      4       4
+4      3       3
+NULL   2       NULL
+NULL   NULL    NULL
+PREHOOK: query: select * from t t1 right outer join t t2 using (b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 right outer join t t2 using (b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2       2
+1      2       NULL
+1      NULL    2
+1      NULL    NULL
+2      1       1
+3      3       3
+3      3       4
+3      4       3
+3      4       4
+4      3       3
+NULL   NULL    2
+NULL   NULL    NULL
+PREHOOK: query: select * from t t1 inner join t t2 using (b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t t1 inner join t t2 using (b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2       2
+1      2       NULL
+1      NULL    2
+1      NULL    NULL
+2      1       1
+3      3       3
+3      3       4
+3      4       3
+3      4       4
+4      3       3
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: create view v as select * from t t1 join t t2 using (a,b)
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as select * from t t1 join t t2 using (a,b)
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+POSTHOOK: Lineage: v.a SIMPLE [(t)t1.FieldSchema(name:a, type:int, 
comment:null), ]
+POSTHOOK: Lineage: v.b SIMPLE [(t)t1.FieldSchema(name:b, type:int, 
comment:null), ]
+PREHOOK: query: desc formatted v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: desc formatted v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+# col_name             data_type               comment             
+                
+a                      int                                         
+b                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+Table Type:            VIRTUAL_VIEW             
+Table Parameters:               
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         null                     
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+                
+# View Information              
+View Original Text:    select * from t t1 join t t2 using (a,b)         
+View Expanded Text:    select `t1`.`a`, `t1`.`b` from `default`.`t` `t1` join 
`default`.`t` `t2` using (`a`,`b`)        
+View Rewrite Enabled:  No                       
+PREHOOK: query: select * from v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+1      2
+2      1
+3      3
+3      4
+4      3
+PREHOOK: query: drop view v
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@v
+PREHOOK: Output: default@v
+POSTHOOK: query: drop view v
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@v
+POSTHOOK: Output: default@v
+PREHOOK: query: create view v as select * from t t1 right outer join t t2 
using (b,a)
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@v
+POSTHOOK: query: create view v as select * from t t1 right outer join t t2 
using (b,a)
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@v
+POSTHOOK: Lineage: v.a SIMPLE [(t)t2.FieldSchema(name:a, type:int, 
comment:null), ]
+POSTHOOK: Lineage: v.b SIMPLE [(t)t2.FieldSchema(name:b, type:int, 
comment:null), ]
+PREHOOK: query: desc formatted v
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@v
+POSTHOOK: query: desc formatted v
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@v
+# col_name             data_type               comment             
+                
+b                      int                                         
+a                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+Table Type:            VIRTUAL_VIEW             
+Table Parameters:               
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         null                     
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+                
+# View Information              
+View Original Text:    select * from t t1 right outer join t t2 using (b,a)    
 
+View Expanded Text:    select `t2`.`b`, `t2`.`a` from `default`.`t` `t1` right 
outer join `default`.`t` `t2` using (`b`,`a`)    
+View Rewrite Enabled:  No                       
+PREHOOK: query: select * from v
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@v
+#### A masked pattern was here ####
+POSTHOOK: query: select * from v
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@v
+#### A masked pattern was here ####
+1      2
+1      NULL
+2      1
+3      3
+3      4
+4      3
+NULL   2
+NULL   NULL
+PREHOOK: query: select * from (select t1.b b from t t1 inner join t t2 using 
(b)) t3 join t t4 using(b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (select t1.b b from t t1 inner join t t2 using 
(b)) t3 join t t4 using(b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+1      2
+1      2
+1      2
+1      NULL
+1      NULL
+1      NULL
+1      NULL
+2      1
+3      3
+3      3
+3      3
+3      3
+3      4
+3      4
+3      4
+3      4
+4      3
+PREHOOK: query: select * from (select t2.a a from t t1 inner join t t2 using 
(b)) t3 join t t4 using(a)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select * from (select t2.a a from t t1 inner join t t2 using 
(b)) t3 join t t4 using(a)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1      2
+2      1
+2      1
+2      NULL
+2      NULL
+3      3
+3      3
+3      3
+3      4
+3      4
+3      4
+4      3
+4      3
+PREHOOK: query: create table tt as select * from (select t2.a a from t t1 
inner join t t2 using (b)) t3 join t t4 using(a)
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tt
+POSTHOOK: query: create table tt as select * from (select t2.a a from t t1 
inner join t t2 using (b)) t3 join t t4 using(a)
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tt
+POSTHOOK: Lineage: tt.a SIMPLE [(t)t2.FieldSchema(name:a, type:int, 
comment:null), ]
+POSTHOOK: Lineage: tt.b SIMPLE [(t)t4.FieldSchema(name:b, type:int, 
comment:null), ]
+PREHOOK: query: desc formatted tt
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tt
+POSTHOOK: query: desc formatted tt
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tt
+# col_name             data_type               comment             
+                
+a                      int                                         
+b                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
+       numFiles                1                   
+       numRows                 13                  
+       rawDataSize             41                  
+       totalSize               54                  
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe      
 
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1

hive git commit: HIVE-15983: Support the named columns join (Pengcheng Xiong, reviewed by Ashutosh Chauhan)

Reply via email to