aglinxinyuan commented on code in PR #4908:
URL: https://github.com/apache/texera/pull/4908#discussion_r3179893029


##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/aggregate/AggregationOperationSpec.scala:
##########
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.aggregate
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class AggregationOperationSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private def schemaWith(name: String, t: AttributeType): Schema =
+    new Schema(new Attribute(name, t))
+
+  private def tupleOf(name: String, t: AttributeType, value: AnyRef): Tuple =
+    Tuple.builder(schemaWith(name, t)).add(new Attribute(name, t), 
value).build()
+
+  private def op(
+      func: AggregationFunction,
+      attribute: String = "v",
+      resultAttribute: String = "r"
+  ): AggregationOperation = {
+    val o = new AggregationOperation()
+    o.aggFunction = func
+    o.attribute = attribute
+    o.resultAttribute = resultAttribute
+    o
+  }
+
+  // --- getAggregationAttribute 
-----------------------------------------------
+
+  "AggregationOperation.getAggregationAttribute" should "preserve the input 
type for SUM" in {
+    val attr = 
op(AggregationFunction.SUM).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getName == "r")
+    assert(attr.getType == AttributeType.LONG)
+  }
+
+  it should "produce INTEGER for COUNT regardless of input type" in {
+    val attr = 
op(AggregationFunction.COUNT).getAggregationAttribute(AttributeType.STRING)
+    assert(attr.getType == AttributeType.INTEGER)
+  }
+
+  it should "produce DOUBLE for AVERAGE regardless of input type" in {
+    val attr = 
op(AggregationFunction.AVERAGE).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getType == AttributeType.DOUBLE)
+  }
+
+  it should "preserve the input type for MIN and MAX" in {
+    assert(
+      
op(AggregationFunction.MIN).getAggregationAttribute(AttributeType.INTEGER).getType
 ==
+        AttributeType.INTEGER
+    )
+    assert(
+      
op(AggregationFunction.MAX).getAggregationAttribute(AttributeType.TIMESTAMP).getType
 ==
+        AttributeType.TIMESTAMP
+    )
+  }
+
+  it should "produce STRING for CONCAT" in {
+    assert(
+      
op(AggregationFunction.CONCAT).getAggregationAttribute(AttributeType.STRING).getType
 ==
+        AttributeType.STRING
+    )
+  }
+
+  it should "throw RuntimeException when aggFunction is null" in {
+    val ex = intercept[RuntimeException] {
+      op(null).getAggregationAttribute(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: type validation 
-------------------------------------------
+
+  "AggregationOperation.getAggFunc" should "throw for non-numeric types on 
SUM" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(AggregationFunction.SUM).getAggFunc(AttributeType.STRING)
+    }
+    assert(ex.getMessage.contains("Unsupported attribute type for sum"))
+  }
+
+  it should "throw for non-numeric types on MIN and MAX" in {
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MIN).getAggFunc(AttributeType.STRING)
+    }
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MAX).getAggFunc(AttributeType.BOOLEAN)
+    }
+  }
+
+  it should "throw UnsupportedOperationException when aggFunction is null" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(null).getAggFunc(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: SUM behavior 
----------------------------------------------
+
+  "SUM aggregation" should "init at the type's zero, accumulate values, and 
merge partial sums" in {
+    val agg = op(AggregationFunction.SUM).getAggFunc(AttributeType.INTEGER)
+    val zero = agg.init().asInstanceOf[Integer]
+    assert(zero == 0)
+    val t1 = tupleOf("v", AttributeType.INTEGER, Int.box(3))
+    val t2 = tupleOf("v", AttributeType.INTEGER, Int.box(5))
+    val partial = agg.iterate(agg.iterate(zero, t1), t2)
+    assert(partial.asInstanceOf[Integer] == 8)
+    val merged = agg.merge(partial, partial)
+    assert(merged.asInstanceOf[Integer] == 16)
+    assert(agg.finalAgg(merged).asInstanceOf[Integer] == 16)
+  }

Review Comment:
   Done in 07423b3fe9 — agreed; trimmed the spec to remove the duplicates with 
AggregateOpSpec. Removed all of `getAggregationAttribute`, the per-kind 
`iterate` tests (SUM/COUNT/AVERAGE/CONCAT), and the `getFinal` shape tests. 
Added a header comment in the spec documenting which behaviors AggregateOpSpec 
already covers, so future contributors do not re-add them. Net change: 19 → 7 
tests.



##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/aggregate/AggregationOperationSpec.scala:
##########
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.aggregate
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class AggregationOperationSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private def schemaWith(name: String, t: AttributeType): Schema =
+    new Schema(new Attribute(name, t))
+
+  private def tupleOf(name: String, t: AttributeType, value: AnyRef): Tuple =
+    Tuple.builder(schemaWith(name, t)).add(new Attribute(name, t), 
value).build()
+
+  private def op(
+      func: AggregationFunction,
+      attribute: String = "v",
+      resultAttribute: String = "r"
+  ): AggregationOperation = {
+    val o = new AggregationOperation()
+    o.aggFunction = func
+    o.attribute = attribute
+    o.resultAttribute = resultAttribute
+    o
+  }
+
+  // --- getAggregationAttribute 
-----------------------------------------------
+
+  "AggregationOperation.getAggregationAttribute" should "preserve the input 
type for SUM" in {
+    val attr = 
op(AggregationFunction.SUM).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getName == "r")
+    assert(attr.getType == AttributeType.LONG)
+  }
+
+  it should "produce INTEGER for COUNT regardless of input type" in {
+    val attr = 
op(AggregationFunction.COUNT).getAggregationAttribute(AttributeType.STRING)
+    assert(attr.getType == AttributeType.INTEGER)
+  }
+
+  it should "produce DOUBLE for AVERAGE regardless of input type" in {
+    val attr = 
op(AggregationFunction.AVERAGE).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getType == AttributeType.DOUBLE)
+  }
+
+  it should "preserve the input type for MIN and MAX" in {
+    assert(
+      
op(AggregationFunction.MIN).getAggregationAttribute(AttributeType.INTEGER).getType
 ==
+        AttributeType.INTEGER
+    )
+    assert(
+      
op(AggregationFunction.MAX).getAggregationAttribute(AttributeType.TIMESTAMP).getType
 ==
+        AttributeType.TIMESTAMP
+    )
+  }
+
+  it should "produce STRING for CONCAT" in {
+    assert(
+      
op(AggregationFunction.CONCAT).getAggregationAttribute(AttributeType.STRING).getType
 ==
+        AttributeType.STRING
+    )
+  }
+
+  it should "throw RuntimeException when aggFunction is null" in {
+    val ex = intercept[RuntimeException] {
+      op(null).getAggregationAttribute(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: type validation 
-------------------------------------------
+
+  "AggregationOperation.getAggFunc" should "throw for non-numeric types on 
SUM" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(AggregationFunction.SUM).getAggFunc(AttributeType.STRING)
+    }
+    assert(ex.getMessage.contains("Unsupported attribute type for sum"))
+  }
+
+  it should "throw for non-numeric types on MIN and MAX" in {
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MIN).getAggFunc(AttributeType.STRING)
+    }
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MAX).getAggFunc(AttributeType.BOOLEAN)
+    }
+  }
+
+  it should "throw UnsupportedOperationException when aggFunction is null" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(null).getAggFunc(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: SUM behavior 
----------------------------------------------
+
+  "SUM aggregation" should "init at the type's zero, accumulate values, and 
merge partial sums" in {
+    val agg = op(AggregationFunction.SUM).getAggFunc(AttributeType.INTEGER)
+    val zero = agg.init().asInstanceOf[Integer]
+    assert(zero == 0)
+    val t1 = tupleOf("v", AttributeType.INTEGER, Int.box(3))
+    val t2 = tupleOf("v", AttributeType.INTEGER, Int.box(5))
+    val partial = agg.iterate(agg.iterate(zero, t1), t2)
+    assert(partial.asInstanceOf[Integer] == 8)
+    val merged = agg.merge(partial, partial)
+    assert(merged.asInstanceOf[Integer] == 16)
+    assert(agg.finalAgg(merged).asInstanceOf[Integer] == 16)
+  }
+
+  // --- getAggFunc: COUNT behavior 
--------------------------------------------
+
+  "COUNT aggregation" should "treat a null `attribute` as count-all (one per 
tuple)" in {
+    val agg = op(AggregationFunction.COUNT, attribute = 
null).getAggFunc(AttributeType.INTEGER)
+    val t = tupleOf("v", AttributeType.INTEGER, null)
+    val out = agg.iterate(agg.iterate(agg.init(), t), t).asInstanceOf[Integer]
+    assert(out == 2, "with attribute=null, every tuple — even null-valued — 
should count")
+  }
+
+  it should "count only non-null values when `attribute` is set" in {
+    val agg = op(AggregationFunction.COUNT, attribute = 
"v").getAggFunc(AttributeType.INTEGER)
+    val nonNull = tupleOf("v", AttributeType.INTEGER, Int.box(7))
+    val nullVal = tupleOf("v", AttributeType.INTEGER, null)
+    val out = agg
+      .iterate(agg.iterate(agg.iterate(agg.init(), nonNull), nullVal), nonNull)
+      .asInstanceOf[Integer]
+    assert(out == 2, "two non-null tuples + one null → count == 2")
+  }
+
+  // --- getAggFunc: AVERAGE behavior 
------------------------------------------
+
+  "AVERAGE aggregation" should "init at (0,0), accumulate sum+count, and yield 
sum/count" in {
+    // averageAgg() returns DistributedAggregation[AveragePartialObj] but is
+    // type-erased to Object via getAggFunc, so we cast back here.
+    val agg = op(AggregationFunction.AVERAGE).getAggFunc(AttributeType.DOUBLE)
+    val zero = agg.init().asInstanceOf[AveragePartialObj]
+    assert(zero == AveragePartialObj(0, 0))
+
+    val t1 = tupleOf("v", AttributeType.DOUBLE, java.lang.Double.valueOf(2.0))
+    val t2 = tupleOf("v", AttributeType.DOUBLE, java.lang.Double.valueOf(4.0))
+    val acc = agg
+      .iterate(agg.iterate(zero, t1), t2)
+      .asInstanceOf[AveragePartialObj]
+    assert(acc == AveragePartialObj(6.0, 2))
+    val finalVal = agg.finalAgg(acc).asInstanceOf[java.lang.Double]
+    assert(finalVal == 3.0)
+  }
+
+  it should "yield null when no non-null values were aggregated" in {
+    val agg = op(AggregationFunction.AVERAGE).getAggFunc(AttributeType.DOUBLE)
+    val zero = agg.init()
+    val finalVal = agg.finalAgg(zero)
+    assert(finalVal == null)
+  }
+
+  // --- getAggFunc: CONCAT behavior 
-------------------------------------------
+
+  "CONCAT aggregation" should "concatenate non-empty values with commas and 
skip null gracefully" in {
+    val agg = op(AggregationFunction.CONCAT).getAggFunc(AttributeType.STRING)
+    assert(agg.init() == "")
+    val t1 = tupleOf("v", AttributeType.STRING, "a")
+    val t2 = tupleOf("v", AttributeType.STRING, "b")
+    val tNull = tupleOf("v", AttributeType.STRING, null)
+    val out =
+      agg.iterate(agg.iterate(agg.iterate(agg.init(), t1), tNull), t2)
+    assert(out == "a,,b", "null values are emitted as empty between commas")
+  }
+
+  it should "merge two non-empty partial strings with a comma" in {
+    val agg = op(AggregationFunction.CONCAT).getAggFunc(AttributeType.STRING)
+    assert(agg.merge("foo", "bar") == "foo,bar")
+    assert(agg.merge("", "bar") == "bar")
+    assert(agg.merge("foo", "") == "foo")
+    assert(agg.merge("", "") == "")
+  }
+
+  // --- getFinal 
--------------------------------------------------------------

Review Comment:
   Done in 07423b3fe9 — added two end-to-end pipeline tests under `"Worker → 
final aggregation pipeline"`: each runs a real worker partial aggregation 
(COUNT and SUM), emits a partial output tuple, then re-aggregates the partials 
through the operation produced by `getFinal`, asserting that the two-stage 
result equals a single-pass aggregation.



##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/aggregate/AggregationOperationSpec.scala:
##########
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.aggregate
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class AggregationOperationSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private def schemaWith(name: String, t: AttributeType): Schema =
+    new Schema(new Attribute(name, t))
+
+  private def tupleOf(name: String, t: AttributeType, value: AnyRef): Tuple =
+    Tuple.builder(schemaWith(name, t)).add(new Attribute(name, t), 
value).build()
+
+  private def op(
+      func: AggregationFunction,
+      attribute: String = "v",
+      resultAttribute: String = "r"
+  ): AggregationOperation = {
+    val o = new AggregationOperation()
+    o.aggFunction = func
+    o.attribute = attribute
+    o.resultAttribute = resultAttribute
+    o
+  }

Review Comment:
   Done in 07423b3fe9 — agreed. Trimmed the duplicates with AggregateOpSpec 
(getAggregationAttribute, per-kind iterate, getFinal shape) and added a 
coverage-notes header in the spec documenting what AggregateOpSpec already 
covers, so the two suites no longer overlap.



##########
common/workflow-operator/src/test/scala/org/apache/texera/amber/operator/aggregate/AggregationOperationSpec.scala:
##########
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.amber.operator.aggregate
+
+import org.apache.texera.amber.core.tuple.{Attribute, AttributeType, Schema, 
Tuple}
+import org.scalatest.flatspec.AnyFlatSpec
+
+class AggregationOperationSpec extends AnyFlatSpec {
+
+  // --- helpers 
---------------------------------------------------------------
+
+  private def schemaWith(name: String, t: AttributeType): Schema =
+    new Schema(new Attribute(name, t))
+
+  private def tupleOf(name: String, t: AttributeType, value: AnyRef): Tuple =
+    Tuple.builder(schemaWith(name, t)).add(new Attribute(name, t), 
value).build()
+
+  private def op(
+      func: AggregationFunction,
+      attribute: String = "v",
+      resultAttribute: String = "r"
+  ): AggregationOperation = {
+    val o = new AggregationOperation()
+    o.aggFunction = func
+    o.attribute = attribute
+    o.resultAttribute = resultAttribute
+    o
+  }
+
+  // --- getAggregationAttribute 
-----------------------------------------------
+
+  "AggregationOperation.getAggregationAttribute" should "preserve the input 
type for SUM" in {
+    val attr = 
op(AggregationFunction.SUM).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getName == "r")
+    assert(attr.getType == AttributeType.LONG)
+  }
+
+  it should "produce INTEGER for COUNT regardless of input type" in {
+    val attr = 
op(AggregationFunction.COUNT).getAggregationAttribute(AttributeType.STRING)
+    assert(attr.getType == AttributeType.INTEGER)
+  }
+
+  it should "produce DOUBLE for AVERAGE regardless of input type" in {
+    val attr = 
op(AggregationFunction.AVERAGE).getAggregationAttribute(AttributeType.LONG)
+    assert(attr.getType == AttributeType.DOUBLE)
+  }
+
+  it should "preserve the input type for MIN and MAX" in {
+    assert(
+      
op(AggregationFunction.MIN).getAggregationAttribute(AttributeType.INTEGER).getType
 ==
+        AttributeType.INTEGER
+    )
+    assert(
+      
op(AggregationFunction.MAX).getAggregationAttribute(AttributeType.TIMESTAMP).getType
 ==
+        AttributeType.TIMESTAMP
+    )
+  }
+
+  it should "produce STRING for CONCAT" in {
+    assert(
+      
op(AggregationFunction.CONCAT).getAggregationAttribute(AttributeType.STRING).getType
 ==
+        AttributeType.STRING
+    )
+  }
+
+  it should "throw RuntimeException when aggFunction is null" in {
+    val ex = intercept[RuntimeException] {
+      op(null).getAggregationAttribute(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: type validation 
-------------------------------------------
+
+  "AggregationOperation.getAggFunc" should "throw for non-numeric types on 
SUM" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(AggregationFunction.SUM).getAggFunc(AttributeType.STRING)
+    }
+    assert(ex.getMessage.contains("Unsupported attribute type for sum"))
+  }
+
+  it should "throw for non-numeric types on MIN and MAX" in {
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MIN).getAggFunc(AttributeType.STRING)
+    }
+    intercept[UnsupportedOperationException] {
+      op(AggregationFunction.MAX).getAggFunc(AttributeType.BOOLEAN)
+    }
+  }
+
+  it should "throw UnsupportedOperationException when aggFunction is null" in {
+    val ex = intercept[UnsupportedOperationException] {
+      op(null).getAggFunc(AttributeType.INTEGER)
+    }
+    assert(ex.getMessage.contains("Unknown aggregation function"))
+  }
+
+  // --- getAggFunc: SUM behavior 
----------------------------------------------
+
+  "SUM aggregation" should "init at the type's zero, accumulate values, and 
merge partial sums" in {
+    val agg = op(AggregationFunction.SUM).getAggFunc(AttributeType.INTEGER)
+    val zero = agg.init().asInstanceOf[Integer]
+    assert(zero == 0)
+    val t1 = tupleOf("v", AttributeType.INTEGER, Int.box(3))
+    val t2 = tupleOf("v", AttributeType.INTEGER, Int.box(5))
+    val partial = agg.iterate(agg.iterate(zero, t1), t2)
+    assert(partial.asInstanceOf[Integer] == 8)
+    val merged = agg.merge(partial, partial)
+    assert(merged.asInstanceOf[Integer] == 16)
+    assert(agg.finalAgg(merged).asInstanceOf[Integer] == 16)
+  }
+
+  // --- getAggFunc: COUNT behavior 
--------------------------------------------
+
+  "COUNT aggregation" should "treat a null `attribute` as count-all (one per 
tuple)" in {
+    val agg = op(AggregationFunction.COUNT, attribute = 
null).getAggFunc(AttributeType.INTEGER)
+    val t = tupleOf("v", AttributeType.INTEGER, null)
+    val out = agg.iterate(agg.iterate(agg.init(), t), t).asInstanceOf[Integer]
+    assert(out == 2, "with attribute=null, every tuple — even null-valued — 
should count")
+  }
+
+  it should "count only non-null values when `attribute` is set" in {
+    val agg = op(AggregationFunction.COUNT, attribute = 
"v").getAggFunc(AttributeType.INTEGER)
+    val nonNull = tupleOf("v", AttributeType.INTEGER, Int.box(7))
+    val nullVal = tupleOf("v", AttributeType.INTEGER, null)
+    val out = agg
+      .iterate(agg.iterate(agg.iterate(agg.init(), nonNull), nullVal), nonNull)
+      .asInstanceOf[Integer]
+    assert(out == 2, "two non-null tuples + one null → count == 2")
+  }
+
+  // --- getAggFunc: AVERAGE behavior 
------------------------------------------
+
+  "AVERAGE aggregation" should "init at (0,0), accumulate sum+count, and yield 
sum/count" in {
+    // averageAgg() returns DistributedAggregation[AveragePartialObj] but is
+    // type-erased to Object via getAggFunc, so we cast back here.
+    val agg = op(AggregationFunction.AVERAGE).getAggFunc(AttributeType.DOUBLE)
+    val zero = agg.init().asInstanceOf[AveragePartialObj]
+    assert(zero == AveragePartialObj(0, 0))
+
+    val t1 = tupleOf("v", AttributeType.DOUBLE, java.lang.Double.valueOf(2.0))
+    val t2 = tupleOf("v", AttributeType.DOUBLE, java.lang.Double.valueOf(4.0))
+    val acc = agg
+      .iterate(agg.iterate(zero, t1), t2)
+      .asInstanceOf[AveragePartialObj]
+    assert(acc == AveragePartialObj(6.0, 2))
+    val finalVal = agg.finalAgg(acc).asInstanceOf[java.lang.Double]
+    assert(finalVal == 3.0)
+  }
+
+  it should "yield null when no non-null values were aggregated" in {
+    val agg = op(AggregationFunction.AVERAGE).getAggFunc(AttributeType.DOUBLE)
+    val zero = agg.init()
+    val finalVal = agg.finalAgg(zero)
+    assert(finalVal == null)
+  }
+
+  // --- getAggFunc: CONCAT behavior 
-------------------------------------------
+
+  "CONCAT aggregation" should "concatenate non-empty values with commas and 
skip null gracefully" in {
+    val agg = op(AggregationFunction.CONCAT).getAggFunc(AttributeType.STRING)
+    assert(agg.init() == "")
+    val t1 = tupleOf("v", AttributeType.STRING, "a")
+    val t2 = tupleOf("v", AttributeType.STRING, "b")
+    val tNull = tupleOf("v", AttributeType.STRING, null)
+    val out =
+      agg.iterate(agg.iterate(agg.iterate(agg.init(), t1), tNull), t2)
+    assert(out == "a,,b", "null values are emitted as empty between commas")
+  }

Review Comment:
   Done in 07423b3fe9 — the misleading CONCAT iterate test was removed 
altogether (its iterate behavior is already covered by `AggregateOpSpec`'s 
CONCAT case). What remains in this spec is a CONCAT *merge* test that uses an 
unambiguous description: `"join two non-empty partials with a comma and 
short-circuit when either is empty"`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to