asolimando commented on code in PR #4544:
URL: https://github.com/apache/calcite/pull/4544#discussion_r2356684935


##########
core/src/test/java/org/apache/calcite/test/RelBuilderTest.java:
##########
@@ -5585,6 +5590,284 @@ private static RelNode 
buildCorrelateWithJoin(JoinRelType type, RelBuilder build
     assertThat(root, hasTree(expected));
   }
 
+
+  @Test void testCombine() {
+    // Create two separate queries
+    final RelBuilder builder = RelBuilder.create(config().build());
+    RelNode scan1 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .project(builder.field("ENAME"))
+        .build();
+    RelNode scan2 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(20)))
+        .project(builder.field("SAL"))
+        .build();
+
+    // Test combine with varargs
+    RelNode combine1 = builder.combine(scan1, scan2).build();
+    assertThat(combine1, instanceOf(Combine.class));
+    assertThat(combine1.getInputs(), hasSize(2));
+
+    // Test combine with Iterable
+    RelNode combine2 = builder.combine(Arrays.asList(scan1, scan2)).build();
+    assertThat(combine2, instanceOf(Combine.class));
+    assertThat(combine2.getInputs(), hasSize(2));
+
+    // Test combine with stack
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine3 = builder.combine(2).build();
+    assertThat(combine3, instanceOf(Combine.class));
+    assertThat(((Combine) combine3).getInputs(), hasSize(2));
+
+    // Test combine with all stack
+    builder.clear();
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine4 = builder.combine().build();
+    assertThat(combine4, instanceOf(Combine.class));
+    assertThat(combine4.getInputs(), hasSize(2));
+  }
+
+  @Test void testCombineWithSharedSubexpression() {
+    // Test that demonstrates how Combine can optimize queries with shared 
subexpressions
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create a common subexpression - employees with salary > 1000
+    builder.scan("EMP")
+        .filter(
+            builder.call(SqlStdOperatorTable.GREATER_THAN,
+            builder.field("SAL"), builder.literal(1000)));
+    RelNode commonSubexpr = builder.build();
+
+    // Query 1: Count high earners by department
+    RelNode query1 = builder.push(commonSubexpr)
+        .aggregate(builder.groupKey("DEPTNO"), builder.count())
+        .build();
+
+    // Query 2: Get names of high earners
+    RelNode query2 = builder.push(commonSubexpr)
+        .project(builder.field("ENAME"), builder.field("SAL"))
+        .build();
+
+    // Combine both queries - planner can recognize shared subexpression
+    RelNode combined = builder.combine(query1, query2).build();
+
+    assertThat(combined, instanceOf(Combine.class));
+    assertThat(combined.getInputs(), hasSize(2));
+    // Both queries share the same filter as their base
+    assertThat(combined.getInputs().get(0).getInput(0), 
instanceOf(Filter.class));
+    assertThat(combined.getInputs().get(1).getInput(0), 
instanceOf(Filter.class));
+  }
+
+  @Test void testCombineDifferentRowTypes() {
+    // Test that Combine doesn't require inputs to have the same row type 
(unlike Union)
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Query 1: Returns (ENAME: VARCHAR)
+    RelNode query1 = builder.scan("EMP")
+        .project(builder.field("ENAME"))
+        .build();
+
+    // Query 2: Returns (DEPTNO: INTEGER, AVG_SAL: DECIMAL)
+    RelNode query2 = builder.scan("EMP")
+        .aggregate(builder.groupKey("DEPTNO"),
+            builder.avg(false, "AVG_SAL", builder.field("SAL")))
+        .build();
+
+    // Query 3: Returns just count (COUNT: BIGINT)
+    RelNode query3 = builder.scan("DEPT")
+        .aggregate(builder.groupKey(), builder.count())
+        .build();
+
+    // Combine can handle different row types
+    RelNode combined = builder.combine(query1, query2, query3).build();
+
+    assertThat(combined, instanceOf(Combine.class));
+    assertThat(combined.getInputs(), hasSize(3));
+    // Verify each has different row type
+    assertThat(combined.getInputs().get(0).getRowType().getFieldCount(), 
is(1));
+    assertThat(combined.getInputs().get(1).getRowType().getFieldCount(), 
is(2));
+    assertThat(combined.getInputs().get(2).getRowType().getFieldCount(), 
is(1));
+  }
+
+  @Test void testCombineMetadata() {
+    // Test metadata methods for Combine operator
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create queries with known characteristics
+    RelNode query1 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .build();
+
+    RelNode query2 = builder.scan("DEPT")
+        .project(builder.field("DNAME"))
+        .build();
+
+    RelNode combined = builder.combine(query1, query2).build();
+
+    // Test that Combine has proper metadata
+    assertThat(combined, instanceOf(Combine.class));
+
+    // Get metadata query

Review Comment:
   Nit: I find your comments great, but this one in particular doesn't add much 
and could be removed IMO



##########
core/src/test/java/org/apache/calcite/test/RelBuilderTest.java:
##########
@@ -5585,6 +5590,284 @@ private static RelNode 
buildCorrelateWithJoin(JoinRelType type, RelBuilder build
     assertThat(root, hasTree(expected));
   }
 
+
+  @Test void testCombine() {
+    // Create two separate queries
+    final RelBuilder builder = RelBuilder.create(config().build());
+    RelNode scan1 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .project(builder.field("ENAME"))
+        .build();
+    RelNode scan2 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(20)))
+        .project(builder.field("SAL"))
+        .build();
+
+    // Test combine with varargs
+    RelNode combine1 = builder.combine(scan1, scan2).build();
+    assertThat(combine1, instanceOf(Combine.class));
+    assertThat(combine1.getInputs(), hasSize(2));
+
+    // Test combine with Iterable
+    RelNode combine2 = builder.combine(Arrays.asList(scan1, scan2)).build();
+    assertThat(combine2, instanceOf(Combine.class));
+    assertThat(combine2.getInputs(), hasSize(2));
+
+    // Test combine with stack
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine3 = builder.combine(2).build();
+    assertThat(combine3, instanceOf(Combine.class));
+    assertThat(((Combine) combine3).getInputs(), hasSize(2));
+
+    // Test combine with all stack
+    builder.clear();
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine4 = builder.combine().build();
+    assertThat(combine4, instanceOf(Combine.class));
+    assertThat(combine4.getInputs(), hasSize(2));
+  }
+
+  @Test void testCombineWithSharedSubexpression() {
+    // Test that demonstrates how Combine can optimize queries with shared 
subexpressions
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create a common subexpression - employees with salary > 1000
+    builder.scan("EMP")
+        .filter(
+            builder.call(SqlStdOperatorTable.GREATER_THAN,
+            builder.field("SAL"), builder.literal(1000)));
+    RelNode commonSubexpr = builder.build();
+
+    // Query 1: Count high earners by department
+    RelNode query1 = builder.push(commonSubexpr)
+        .aggregate(builder.groupKey("DEPTNO"), builder.count())
+        .build();
+
+    // Query 2: Get names of high earners
+    RelNode query2 = builder.push(commonSubexpr)
+        .project(builder.field("ENAME"), builder.field("SAL"))
+        .build();
+
+    // Combine both queries - planner can recognize shared subexpression
+    RelNode combined = builder.combine(query1, query2).build();
+
+    assertThat(combined, instanceOf(Combine.class));
+    assertThat(combined.getInputs(), hasSize(2));
+    // Both queries share the same filter as their base
+    assertThat(combined.getInputs().get(0).getInput(0), 
instanceOf(Filter.class));
+    assertThat(combined.getInputs().get(1).getInput(0), 
instanceOf(Filter.class));
+  }
+
+  @Test void testCombineDifferentRowTypes() {
+    // Test that Combine doesn't require inputs to have the same row type 
(unlike Union)
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Query 1: Returns (ENAME: VARCHAR)
+    RelNode query1 = builder.scan("EMP")
+        .project(builder.field("ENAME"))
+        .build();
+
+    // Query 2: Returns (DEPTNO: INTEGER, AVG_SAL: DECIMAL)
+    RelNode query2 = builder.scan("EMP")
+        .aggregate(builder.groupKey("DEPTNO"),
+            builder.avg(false, "AVG_SAL", builder.field("SAL")))
+        .build();
+
+    // Query 3: Returns just count (COUNT: BIGINT)
+    RelNode query3 = builder.scan("DEPT")
+        .aggregate(builder.groupKey(), builder.count())
+        .build();
+
+    // Combine can handle different row types
+    RelNode combined = builder.combine(query1, query2, query3).build();
+
+    assertThat(combined, instanceOf(Combine.class));
+    assertThat(combined.getInputs(), hasSize(3));
+    // Verify each has different row type
+    assertThat(combined.getInputs().get(0).getRowType().getFieldCount(), 
is(1));
+    assertThat(combined.getInputs().get(1).getRowType().getFieldCount(), 
is(2));
+    assertThat(combined.getInputs().get(2).getRowType().getFieldCount(), 
is(1));
+  }
+
+  @Test void testCombineMetadata() {
+    // Test metadata methods for Combine operator
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create queries with known characteristics
+    RelNode query1 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .build();
+
+    RelNode query2 = builder.scan("DEPT")
+        .project(builder.field("DNAME"))
+        .build();
+
+    RelNode combined = builder.combine(query1, query2).build();
+
+    // Test that Combine has proper metadata
+    assertThat(combined, instanceOf(Combine.class));
+
+    // Get metadata query
+    RelMetadataQuery mq = combined.getCluster().getMetadataQuery();
+
+    // Test row count - should be sum of inputs
+    Double rowCount = mq.getRowCount(combined);
+    assertThat(rowCount, notNullValue());
+
+    // Test cost computation
+    RelOptCost selfCost =
+        combined.computeSelfCost(combined.getCluster().getPlanner(), mq);
+    assertThat(selfCost, notNullValue());
+    assertThat(selfCost.getCpu(), greaterThan(0.0));
+
+    // Test cumulative cost includes all inputs
+    RelOptCost cumulativeCost = mq.getCumulativeCost(combined);
+    assertThat(cumulativeCost, notNullValue());
+
+    // Cumulative cost should be greater than self cost
+    assertThat(cumulativeCost.isLt(selfCost), is(false));
+  }
+
+  @Test void testCombineCumulativeCost() {
+    // Comprehensive test for getCumulativeCost with Combine operator
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create three queries with different complexities
+    // Query 1: Simple scan
+    RelNode scan = builder.scan("EMP").build();
+
+    // Query 2: Scan with filter
+    RelNode filtered = builder.scan("EMP")
+        .filter(
+            builder.call(SqlStdOperatorTable.GREATER_THAN,
+            builder.field("SAL"), builder.literal(1000)))
+        .build();
+
+    // Query 3: Scan with aggregation
+    RelNode aggregated = builder.scan("EMP")
+        .aggregate(builder.groupKey("DEPTNO"),
+            builder.count(false, "CNT"),
+            builder.sum(false, "SUM_SAL", builder.field("SAL")))
+        .build();
+
+    // Create Combine with all three queries
+    RelNode combined = builder.combine(scan, filtered, aggregated).build();
+
+    RelMetadataQuery mq = combined.getCluster().getMetadataQuery();
+
+    // Get individual cumulative costs
+    RelOptCost scanCost = mq.getCumulativeCost(scan);
+    RelOptCost filteredCost = mq.getCumulativeCost(filtered);
+    RelOptCost aggregatedCost = mq.getCumulativeCost(aggregated);
+
+    assertThat(scanCost, notNullValue());
+    assertThat(filteredCost, notNullValue());
+    assertThat(aggregatedCost, notNullValue());
+
+    // Get Combine's costs
+    RelOptCost combineSelfCost =
+        combined.computeSelfCost(combined.getCluster().getPlanner(), mq);
+    RelOptCost combineCumulativeCost = mq.getCumulativeCost(combined);
+
+    assertThat(combineSelfCost, notNullValue());
+    assertThat(combineCumulativeCost, notNullValue());
+
+    // Verify cumulative cost is sum of all input costs plus self cost
+    RelOptCost expectedTotal = combineSelfCost
+        .plus(scanCost)
+        .plus(filteredCost)
+        .plus(aggregatedCost);
+
+    // The cumulative cost should equal the sum
+    assertThat(combineCumulativeCost.getRows(),
+        is(expectedTotal.getRows()));
+    assertThat(combineCumulativeCost.getCpu(),
+        is(expectedTotal.getCpu()));
+    assertThat(combineCumulativeCost.getIo(),
+        is(expectedTotal.getIo()));
+
+    // Verify relationships between costs
+    // Filtered should cost more than simple scan
+    assertThat(filteredCost.isLt(scanCost), is(false));
+
+    // Aggregated should cost more than simple scan
+    assertThat(aggregatedCost.isLt(scanCost), is(false));
+
+    // Combined cumulative cost should be greater than any individual cost
+    assertThat(combineCumulativeCost.isLt(scanCost), is(false));
+    assertThat(combineCumulativeCost.isLt(filteredCost), is(false));
+    assertThat(combineCumulativeCost.isLt(aggregatedCost), is(false));
+  }
+
+  @Test void testCombineCumulativeCostWithSharedInputs() {
+    // Test cumulative cost when Combine has shared subexpressions
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create a base scan that will be shared
+    RelNode baseScan = builder.scan("EMP").build();
+
+    // Create two queries that use the same base scan
+    RelNode query1 = builder.push(baseScan)
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .project(builder.field("ENAME"))
+        .build();
+
+    RelNode query2 = builder.push(baseScan)
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(20)))
+        .aggregate(builder.groupKey(), builder.count())
+        .build();
+
+    // Combine the queries
+    RelNode combined = builder.combine(query1, query2).build();
+
+    RelMetadataQuery mq = combined.getCluster().getMetadataQuery();
+
+    // Get costs
+    RelOptCost query1Cost = mq.getCumulativeCost(query1);
+    RelOptCost query2Cost = mq.getCumulativeCost(query2);
+    RelOptCost combinedCost = mq.getCumulativeCost(combined);
+
+    assertThat(query1Cost, notNullValue());
+    assertThat(query2Cost, notNullValue());
+    assertThat(combinedCost, notNullValue());
+
+    // Even with shared base scan, Combine's cumulative cost

Review Comment:
   Q: is the cost of the shared subtree duplicated here? Should it be?
   
   I think the answer to the aforementioned questions should emerge clearly 
from the test



##########
core/src/test/java/org/apache/calcite/test/RelBuilderTest.java:
##########
@@ -5585,6 +5590,284 @@ private static RelNode 
buildCorrelateWithJoin(JoinRelType type, RelBuilder build
     assertThat(root, hasTree(expected));
   }
 
+
+  @Test void testCombine() {
+    // Create two separate queries
+    final RelBuilder builder = RelBuilder.create(config().build());
+    RelNode scan1 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(10)))
+        .project(builder.field("ENAME"))
+        .build();
+    RelNode scan2 = builder.scan("EMP")
+        .filter(builder.equals(builder.field("DEPTNO"), builder.literal(20)))
+        .project(builder.field("SAL"))
+        .build();
+
+    // Test combine with varargs
+    RelNode combine1 = builder.combine(scan1, scan2).build();
+    assertThat(combine1, instanceOf(Combine.class));
+    assertThat(combine1.getInputs(), hasSize(2));
+
+    // Test combine with Iterable
+    RelNode combine2 = builder.combine(Arrays.asList(scan1, scan2)).build();
+    assertThat(combine2, instanceOf(Combine.class));
+    assertThat(combine2.getInputs(), hasSize(2));
+
+    // Test combine with stack
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine3 = builder.combine(2).build();
+    assertThat(combine3, instanceOf(Combine.class));
+    assertThat(((Combine) combine3).getInputs(), hasSize(2));
+
+    // Test combine with all stack
+    builder.clear();
+    builder.push(scan1)
+        .push(scan2);
+    RelNode combine4 = builder.combine().build();
+    assertThat(combine4, instanceOf(Combine.class));
+    assertThat(combine4.getInputs(), hasSize(2));
+  }
+
+  @Test void testCombineWithSharedSubexpression() {
+    // Test that demonstrates how Combine can optimize queries with shared 
subexpressions
+    final RelBuilder builder = RelBuilder.create(config().build());
+
+    // Create a common subexpression - employees with salary > 1000
+    builder.scan("EMP")
+        .filter(
+            builder.call(SqlStdOperatorTable.GREATER_THAN,
+            builder.field("SAL"), builder.literal(1000)));
+    RelNode commonSubexpr = builder.build();
+
+    // Query 1: Count high earners by department
+    RelNode query1 = builder.push(commonSubexpr)
+        .aggregate(builder.groupKey("DEPTNO"), builder.count())
+        .build();
+
+    // Query 2: Get names of high earners
+    RelNode query2 = builder.push(commonSubexpr)
+        .project(builder.field("ENAME"), builder.field("SAL"))
+        .build();
+
+    // Combine both queries - planner can recognize shared subexpression
+    RelNode combined = builder.combine(query1, query2).build();
+
+    assertThat(combined, instanceOf(Combine.class));
+    assertThat(combined.getInputs(), hasSize(2));
+    // Both queries share the same filter as their base
+    assertThat(combined.getInputs().get(0).getInput(0), 
instanceOf(Filter.class));
+    assertThat(combined.getInputs().get(1).getInput(0), 
instanceOf(Filter.class));
+  }
+
+  @Test void testCombineDifferentRowTypes() {
+    // Test that Combine doesn't require inputs to have the same row type 
(unlike Union)

Review Comment:
   Not: technically union requires row types to unify to the same row type, 
element-wise, not having the same type, can you fix this? I know you know, but 
people tend to get confused by this kind of comments



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to