Yicong-Huang commented on code in PR #5896:
URL: https://github.com/apache/texera/pull/5896#discussion_r3463951067


##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -55,17 +55,33 @@ case class AveragePartialObj(sum: Double, count: Double) 
extends Serializable {}
         }
       ]
     }
-  }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "aggFunction": { "const": "count(*)" }
+        }
+      },
+      "then": {},
+      "else": {
+        "required": ["attribute"],
+        "properties": {
+          "attribute": { "minLength": 1 }
+        }
+      }
+    }
+  ]
 }
 """)
 class AggregationOperation {
   @JsonProperty(required = true)
   @JsonSchemaTitle("Aggregate Func")
-  @JsonPropertyDescription("sum, count, average, min, max, or concat")
+  @JsonPropertyDescription("sum, count, count(*), average, min, max, or 
concat")

Review Comment:
   let's remove it. I think `count` already include `count(*)` on the semantic. 



##########
frontend/src/app/workspace/component/property-editor/operator-property-edit-frame/operator-property-edit-frame.component.ts:
##########
@@ -545,6 +545,20 @@ export class OperatorPropertyEditFrameComponent implements 
OnInit, OnChanges, On
         mappedField.type = "datasetversionselector";
       }
 
+      // Aggregate: the attribute is required for every function except 
count(*), which counts all rows.
+      // For count(*) the attribute is irrelevant, so disable the field 
(keeping the row layout consistent),
+      // drop the required marker, and clear any previously-selected column so 
it isn't shown greyed-out as if used.
+      // All react to the sibling aggFunction within the same row.
+      if (this.currentOperatorSchema?.operatorType === "Aggregate" && 
mappedField.key === "attribute") {
+        mappedField.expressions = {
+          ...mappedField.expressions,
+          "props.required": (field: FormlyFieldConfig) => 
field.parent?.model?.aggFunction !== "count(*)",
+          "props.disabled": (field: FormlyFieldConfig) => 
field.parent?.model?.aggFunction === "count(*)",
+          "model.attribute": (field: FormlyFieldConfig) =>
+            field.parent?.model?.aggFunction === "count(*)" ? "" : 
field.formControl?.value,

Review Comment:
   better to make `"count(*)"` a constant like `export const COUNT_STAR = 
"count(*)"` and reuse it, also in test.



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -77,26 +93,28 @@ class AggregationOperation {
   @JsonIgnore
   def getAggregationAttribute(attrType: AttributeType): Attribute = {
     val resultAttrType = this.aggFunction match {
-      case AggregationFunction.SUM     => attrType
-      case AggregationFunction.COUNT   => AttributeType.INTEGER
-      case AggregationFunction.AVERAGE => AttributeType.DOUBLE
-      case AggregationFunction.MIN     => attrType
-      case AggregationFunction.MAX     => attrType
-      case AggregationFunction.CONCAT  => AttributeType.STRING
-      case _                           => throw new RuntimeException("Unknown 
aggregation function: " + this.aggFunction)
+      case AggregationFunction.SUM        => attrType
+      case AggregationFunction.COUNT      => AttributeType.INTEGER
+      case AggregationFunction.COUNT_STAR => AttributeType.INTEGER
+      case AggregationFunction.AVERAGE    => AttributeType.DOUBLE
+      case AggregationFunction.MIN        => attrType
+      case AggregationFunction.MAX        => attrType
+      case AggregationFunction.CONCAT     => AttributeType.STRING
+      case _                              => throw new 
RuntimeException("Unknown aggregation function: " + this.aggFunction)
     }
     new Attribute(resultAttribute, resultAttrType)
   }
 
   @JsonIgnore
   def getAggFunc(attrType: AttributeType): DistributedAggregation[Object] = {
     val aggFunc = aggFunction match {
-      case AggregationFunction.AVERAGE => averageAgg()
-      case AggregationFunction.COUNT   => countAgg()
-      case AggregationFunction.MAX     => maxAgg(attrType)
-      case AggregationFunction.MIN     => minAgg(attrType)
-      case AggregationFunction.SUM     => sumAgg(attrType)
-      case AggregationFunction.CONCAT  => concatAgg()
+      case AggregationFunction.AVERAGE    => averageAgg()
+      case AggregationFunction.COUNT      => countAgg()
+      case AggregationFunction.COUNT_STAR => countStarAgg()

Review Comment:
   on the frontend we made it special `count(*)`, but for backend, we could 
make it reuse the `COUNT` function? can we avoid intorducing a new `COUNT_STAR` 
and `countStarAGG()`, which could just be `countAgg(’*‘)?



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationOperation.scala:
##########
@@ -55,17 +55,33 @@ case class AveragePartialObj(sum: Double, count: Double) 
extends Serializable {}
         }
       ]
     }
-  }
+  },
+  "allOf": [
+    {
+      "if": {
+        "properties": {
+          "aggFunction": { "const": "count(*)" }
+        }
+      },
+      "then": {},
+      "else": {
+        "required": ["attribute"],
+        "properties": {
+          "attribute": { "minLength": 1 }
+        }
+      }
+    }
+  ]
 }
 """)
 class AggregationOperation {
   @JsonProperty(required = true)
   @JsonSchemaTitle("Aggregate Func")
-  @JsonPropertyDescription("sum, count, average, min, max, or concat")
+  @JsonPropertyDescription("sum, count, count(*), average, min, max, or 
concat")
   var aggFunction: AggregationFunction = _
 
-  @JsonProperty(value = "attribute", required = true)
-  @JsonPropertyDescription("column to calculate average value")
+  @JsonProperty(value = "attribute")
+  @JsonPropertyDescription("column to aggregate on")

Review Comment:
   good change



##########
common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/aggregate/AggregationFunction.java:
##########
@@ -27,6 +27,8 @@ public enum AggregationFunction {
 
     COUNT("count"),
 
+    COUNT_STAR("count(*)"),

Review Comment:
   having two names could be confusing. no need to change it if separate `*` to 
value input box is too hard.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to