Copilot commented on code in PR #6488:
URL: https://github.com/apache/hive/pull/6488#discussion_r3332008336


##########
ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java:
##########
@@ -713,158 +714,178 @@ public Object process(Node nd, Stack<Node> stack, 
NodeProcessorCtx procCtx, Obje
       Operator<? extends OperatorDesc> inpOp = getParent(stack);
       lCtx.getIndex().copyPredicates(inpOp, op);
 
-      Dependency dep = new Dependency();
-      DependencyType newType = DependencyType.EXPRESSION;
-      dep.setType(newType);
-
-      Set<String> columns = new HashSet<>();
       PartitionedTableFunctionDef funcDef = op.getConf().getFuncDef();
-      StringBuilder sb = new StringBuilder();
-      WindowFrameDef windowFrameDef = null;
-
-      if (!(funcDef.getTFunction() instanceof Noop)) {
-
-        if (funcDef instanceof WindowTableFunctionDef) {
-          // function name
-          WindowFunctionDef windowFunctionDef = ((WindowTableFunctionDef) 
funcDef).getWindowFunctions().getFirst();
-          sb.append(windowFunctionDef.getName()).append("(");
+      int numWinFns = 1;
+      if (funcDef instanceof WindowTableFunctionDef) {
+        numWinFns = ((WindowTableFunctionDef) 
funcDef).getWindowFunctions().size();
+      }
+      List<Dependency> windowDeps = new ArrayList<>(numWinFns);
 
-          addArgs(sb, columns, lCtx, inpOp, op.getSchema(), 
windowFunctionDef.getArgs());
+      for (int winIdx = 0; winIdx < numWinFns; winIdx++) {
+        Dependency dep = new Dependency();
+        DependencyType newType = DependencyType.EXPRESSION;
+        dep.setType(newType);
 
-          windowFrameDef = windowFunctionDef.getWindowFrame();
+        Set<String> columns = new HashSet<>();
+        StringBuilder sb = new StringBuilder();
+        WindowFrameDef windowFrameDef = null;
 
-          if (sb.charAt(sb.length() - 2) == ',') {
-            sb.delete(sb.length() - 2, sb.length());
-          }
-          sb.append(")");
-          sb.append(" over (");
-        } else /* PartitionedTableFunctionDef */ {
-          // function name
-          sb.append(funcDef.getName()).append("(");
-          addArgs(sb, columns, lCtx, inpOp, 
funcDef.getRawInputShape().getRr().getRowSchema(), funcDef.getArgs());
+        if (!(funcDef.getTFunction() instanceof Noop)) {
 
-          // matchpath has argument pattern like matchpath(<input expression>, 
<argument methods: arg1(), arg2()...>)
-          if (funcDef.getInput() != null) {
-            sb.append("on ").append(funcDef.getInput().getAlias()).append(" ");
+          if (funcDef instanceof WindowTableFunctionDef) {
+            // function name
+            WindowFunctionDef windowFunctionDef = ((WindowTableFunctionDef) 
funcDef).getWindowFunctions().get(winIdx);
+            sb.append(windowFunctionDef.getName()).append("(");
 
-            int counter = 1;
-            for (PTFExpressionDef arg : funcDef.getArgs()) {
-              ExprNodeDesc exprNode = arg.getExprNode();
+            addArgs(sb, columns, lCtx, inpOp, op.getSchema(), 
windowFunctionDef.getArgs());
 
-              addIfNotNull(columns, exprNode.getCols());
+            windowFrameDef = windowFunctionDef.getWindowFrame();
 
-              sb.append("arg").append(counter++).append("(");
-              
sb.append(ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 arg.getExprNode(), lCtx, inpOp, null));
-              sb.append("), ");
+            if (sb.charAt(sb.length() - 2) == ',') {
+              sb.delete(sb.length() - 2, sb.length());
             }
+            sb.append(")");
+            sb.append(" over (");
+          } else /* PartitionedTableFunctionDef */ {
+            // function name
+            sb.append(funcDef.getName()).append("(");
+            addArgs(sb, columns, lCtx, inpOp, 
funcDef.getRawInputShape().getRr().getRowSchema(), funcDef.getArgs());
+
+            // matchpath has argument pattern like matchpath(<input 
expression>, <argument methods: arg1(), arg2()...>)
+            if (funcDef.getInput() != null) {
+              sb.append("on ").append(funcDef.getInput().getAlias()).append(" 
");
+
+              int counter = 1;
+              for (PTFExpressionDef arg : funcDef.getArgs()) {
+                ExprNodeDesc exprNode = arg.getExprNode();
+
+                addIfNotNull(columns, exprNode.getCols());
+
+                sb.append("arg").append(counter++).append("(");
+                sb.append(
+                    
ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 arg.getExprNode(),
+                        lCtx, inpOp, null));
+                sb.append("), ");
+              }
 
-            sb.delete(sb.length() - 2, sb.length());
+              sb.delete(sb.length() - 2, sb.length());
+            }
           }
         }
-      }
 
       /*
         Collect partition by and distribute by information.
         Please note, at the expression node level, there is no difference 
between those.
         That means distribute by gets a string partition by in the expression 
string.
        */
-      if (funcDef.getPartition() != null ) {
-        List<PTFExpressionDef> partitionExpressions = 
funcDef.getPartition().getExpressions();
+        if (funcDef.getPartition() != null) {
+          List<PTFExpressionDef> partitionExpressions = 
funcDef.getPartition().getExpressions();
 
-        boolean isPartitionByAdded = false;
-        for (PTFExpressionDef partitionExpr : partitionExpressions) {
-          ExprNodeDesc partitionExprNode = partitionExpr.getExprNode();
+          boolean isPartitionByAdded = false;
+          for (PTFExpressionDef partitionExpr : partitionExpressions) {
+            ExprNodeDesc partitionExprNode = partitionExpr.getExprNode();
 
-          if (partitionExprNode.getCols() != null && 
!partitionExprNode.getCols().isEmpty()) {
-            if (!isPartitionByAdded) {
-              sb.append("partition by ");
-              isPartitionByAdded = true;
-            }
+            if (partitionExprNode.getCols() != null && 
!partitionExprNode.getCols().isEmpty()) {
+              if (!isPartitionByAdded) {
+                sb.append("partition by ");
+                isPartitionByAdded = true;
+              }
 
-            addIfNotNull(columns, partitionExprNode.getCols());
+              addIfNotNull(columns, partitionExprNode.getCols());
 
-            if (partitionExprNode instanceof ExprNodeColumnDesc) {
-              
sb.append(ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 partitionExprNode, lCtx, inpOp, null));
-              sb.append(", ");
-            }
+              if (partitionExprNode instanceof ExprNodeColumnDesc) {
+                sb.append(
+                    
ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 partitionExprNode,
+                        lCtx, inpOp, null));
+                sb.append(", ");
+              }
 
-            sb.delete(sb.length() - 2, sb.length());
+              sb.delete(sb.length() - 2, sb.length());
+            }
           }
-        }
 
-      }
+        }
 
       /*
         Collects the order by and sort by information.
         Please note, at the expression node level, there is no difference 
between those.
         That means sort by gets a string partition by in the expression string.
        */
-      if (funcDef.getOrder() != null) {
+        if (funcDef.getOrder() != null) {
         /*
         Order by is sometimes added by the compiler to make the PTF call 
deterministic.
         At this point of the code execution, we don't know if it is added by 
the compiler or
         it was originally part of the query string.
         */
-        List<OrderExpressionDef> orderExpressions = 
funcDef.getOrder().getExpressions();
+          List<OrderExpressionDef> orderExpressions = 
funcDef.getOrder().getExpressions();
 
-        if (!sb.isEmpty() && sb.charAt(sb.length() - 1) != '(') {
-          sb.append(" ");
-        }
-        sb.append("order by ");
+          if (!sb.isEmpty() && sb.charAt(sb.length() - 1) != '(') {
+            sb.append(" ");
+          }
+          sb.append("order by ");
 
-        for (OrderExpressionDef orderExpr : orderExpressions) {
-          ExprNodeDesc orderExprNode = orderExpr.getExprNode();
-          addIfNotNull(columns, orderExprNode.getCols());
+          for (OrderExpressionDef orderExpr : orderExpressions) {
+            ExprNodeDesc orderExprNode = orderExpr.getExprNode();
+            addIfNotNull(columns, orderExprNode.getCols());
 
-          
sb.append(ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 orderExprNode, lCtx, inpOp, null));
-          if (PTFInvocationSpec.Order.DESC.equals(orderExpr.getOrder())) {
-            sb.append(" desc");
+            sb.append(
+                
ExprProcFactory.getExprString(funcDef.getRawInputShape().getRr().getRowSchema(),
 orderExprNode, lCtx,
+                    inpOp, null));
+            if (PTFInvocationSpec.Order.DESC.equals(orderExpr.getOrder())) {
+              sb.append(" desc");
+            }
+            sb.append(", ");
           }
-          sb.append(", ");
-        }
 
-        sb.delete(sb.length() - 2, sb.length());
-      }
+          sb.delete(sb.length() - 2, sb.length());
+        }
 
       /*
       Window frame is sometimes added by the compiler to make the PTF call 
deterministic.
       At this point of the code execution, we don't know if it is added by the 
compiler or
       it was originally part of the query string.
       */
-      if (windowFrameDef != null) {
-        sb.append(" ").append(windowFrameDef.getWindowType()).append(" between 
");
+        if (windowFrameDef != null) {
+          sb.append(" ").append(windowFrameDef.getWindowType()).append(" 
between ");
 
-        appendBoundary(windowFrameDef.getStart(), sb, " preceding");
+          appendBoundary(windowFrameDef.getStart(), sb, " preceding");
 
-        sb.append(" and ");
+          sb.append(" and ");
 
-        appendBoundary(windowFrameDef.getEnd(), sb, " following");
-      }
+          appendBoundary(windowFrameDef.getEnd(), sb, " following");
+        }
 
-      sb.append(")");
-      dep.setExpr(sb.toString());
+        sb.append(")");
+        dep.setExpr(sb.toString());
 
-      LinkedHashSet<BaseColumnInfo> colSet = new LinkedHashSet<>();
-      for(ColumnInfo ci : inpOp.getSchema().getSignature()) {
-        Dependency d = lCtx.getIndex().getDependency(inpOp, ci);
-        if (d != null) {
-          newType = LineageCtx.getNewDependencyType(d.getType(), newType);
-          if (!ci.isHiddenVirtualCol() && 
columns.contains(ci.getInternalName())) {
-            colSet.addAll(d.getBaseCols());
+        LinkedHashSet<BaseColumnInfo> colSet = new LinkedHashSet<>();
+        for (ColumnInfo ci : inpOp.getSchema().getSignature()) {
+          Dependency d = lCtx.getIndex().getDependency(inpOp, ci);
+          if (d != null) {
+            newType = LineageCtx.getNewDependencyType(d.getType(), newType);
+            if (!ci.isHiddenVirtualCol() && 
columns.contains(ci.getInternalName())) {
+              colSet.addAll(d.getBaseCols());
+            }
           }
         }
-      }
 
-      dep.setType(newType);
-      dep.setBaseCols(colSet);
+        dep.setType(newType);
+        dep.setBaseCols(colSet);
+        windowDeps.add(dep);
+      }
 
       // This dependency is then set for all the colinfos of the script 
operator
+      int windowIdx = 0;
       for(ColumnInfo ci : op.getSchema().getSignature()) {
-        Dependency d = dep;
-          Dependency depCi = lCtx.getIndex().getDependency(inpOp, ci);
-          if (depCi != null) {
-            d = depCi;
-          }
+        Dependency d = null;
+        Dependency depCi = lCtx.getIndex().getDependency(inpOp, ci);
+        if (depCi != null) {
+          d = depCi;

Review Comment:
   `getDependency(inpOp, ci)` here relies on `ColumnInfo` object identity. For 
PTF/windowing, output column infos for carried-forward input columns are 
created as new `ColumnInfo(inpCInfo)` (see 
`PTFTranslator.buildRowResolverForWindowing`), so this lookup will miss and 
incorrectly fall back to a window-function dependency. Use the index lookup by 
internal name to correctly map pass-through columns, and only use `windowDeps` 
for the actual window-function output columns.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to