jorisvandenbossche commented on a change in pull request #9758:
URL: https://github.com/apache/arrow/pull/9758#discussion_r615715447



##########
File path: cpp/src/arrow/compute/api_aggregate.h
##########
@@ -37,43 +37,17 @@ class ExecContext;
 // ----------------------------------------------------------------------
 // Aggregate functions
 
-/// \addtogroup compute-concrete-options
-/// @{
-
-/// \brief Control Count kernel behavior
-///
-/// By default, all non-null values are counted.
-struct ARROW_EXPORT CountOptions : public FunctionOptions {
-  enum Mode {
-    /// Count all non-null values.
-    COUNT_NON_NULL = 0,
-    /// Count all null values.
-    COUNT_NULL,
-  };
-
-  explicit CountOptions(enum Mode count_mode = COUNT_NON_NULL) : 
count_mode(count_mode) {}
-
-  static CountOptions Defaults() { return CountOptions(COUNT_NON_NULL); }
-
-  enum Mode count_mode;
-};
-
-/// \brief Control MinMax kernel behavior
+/// \brief Control general scalar aggregate kernel behavior
 ///
 /// By default, null values are ignored
-struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
-  enum Mode {
-    /// Skip null values
-    SKIP = 0,
-    /// Any nulls will result in null output
-    EMIT_NULL
-  };
+struct ARROW_EXPORT ScalarAggregateOptions : public FunctionOptions {
+  explicit ScalarAggregateOptions(bool skip_nulls = true, uint32_t min_count = 
1)
+      : skip_nulls(skip_nulls), min_count(min_count) {}
 
-  explicit MinMaxOptions(enum Mode null_handling = SKIP) : 
null_handling(null_handling) {}
+  static ScalarAggregateOptions Defaults() { return ScalarAggregateOptions{}; }
 
-  static MinMaxOptions Defaults() { return MinMaxOptions{}; }
-
-  enum Mode null_handling;
+  bool skip_nulls = true;
+  uint32_t min_count = 1;

Review comment:
       On the other hand, for mean/min/max, a `min_count=1` might be a more 
sensible default, to not get NaN/Inf/-Inf for empty/all-null arrays.
   
   (anyway, I don't have a strong opinion on the default, and it probably 
greatly depends on your exact use case what you find the best default. So as 
long as the option is there and clearly documented, that should be fine)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to