felipecrv commented on code in PR #15083: URL: https://github.com/apache/arrow/pull/15083#discussion_r1072793234
########## cpp/src/arrow/compute/exec/aggregate.cc: ########## @@ -121,27 +127,48 @@ Result<Datum> GroupBy(const std::vector<Datum>& arguments, const std::vector<Dat ExecSpanIterator argument_iterator; ExecBatch args_batch; - if (!arguments.empty()) { - ARROW_ASSIGN_OR_RAISE(args_batch, ExecBatch::Make(arguments)); + std::optional<int64_t> inferred_length = ExecBatch::InferLength(arguments); + if (!inferred_length.has_value()) { + inferred_length = ExecBatch::InferLength(keys); + } + DCHECK(inferred_length.has_value()); + const int64_t length = inferred_length.value(); + if (!aggregates.empty()) { + ARROW_ASSIGN_OR_RAISE(args_batch, ExecBatch::Make(arguments, length)); // Construct and initialize HashAggregateKernels - auto argument_types = args_batch.GetTypes(); + std::vector<std::vector<TypeHolder>> aggs_argument_types(aggregates.size()); + { + // Contains the flattened list of aggregate arguments. We use the size of + // each Aggregate::target to re-group the aggregate argument types. + auto argument_types = args_batch.GetTypes(); + size_t i = 0; + for (size_t j = 0; j < aggregates.size(); j++) { + const size_t num_agg_args = aggregates[j].target.size(); + for (size_t k = 0; k < num_agg_args && i < argument_types.size(); k++, i++) { Review Comment: Not needed, but perhaps good to have to make it obvious that access never goes over the array size? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org