icexelloss commented on code in PR #34311: URL: https://github.com/apache/arrow/pull/34311#discussion_r1123681376
########## cpp/src/arrow/compute/exec/aggregate_node.cc: ########## @@ -326,46 +446,86 @@ class ScalarAggregateNode : public ExecNode, public TracedNode { } private: - Status Finish() { - auto scope = TraceFinish(); + Status ReconstructAggregates() { + const auto& input_schema = *inputs()[0]->output_schema(); + auto exec_ctx = plan()->query_context()->exec_context(); + for (size_t i = 0; i < kernels_.size(); ++i) { + std::vector<TypeHolder> in_types; + for (const auto& target : target_fieldsets_[i]) { + in_types.emplace_back(input_schema.field(target)->type().get()); + } + states_[i].resize(plan()->query_context()->max_concurrency()); + KernelContext kernel_ctx{exec_ctx}; + RETURN_NOT_OK(Kernel::InitAll( + &kernel_ctx, KernelInitArgs{kernels_[i], in_types, aggs_[i].options.get()}, + &states_[i])); + } + return Status::OK(); + } + + Status OutputResult(bool is_last = false, bool traced = false) { + if (is_last && !traced) { + auto scope = TraceFinish(); + return OutputResult(is_last, /*traced=*/true); + } + GatedUniqueLock lock(gated_shared_mutex_); ExecBatch batch{{}, 1}; - batch.values.resize(kernels_.size()); + batch.values.resize(kernels_.size() + segment_field_ids_.size()); for (size_t i = 0; i < kernels_.size(); ++i) { util::tracing::Span span; START_COMPUTE_SPAN(span, aggs_[i].function, {{"function.name", aggs_[i].function}, {"function.options", aggs_[i].options ? aggs_[i].options->ToString() : "<NULLPTR>"}, - {"function.kind", std::string(kind_name()) + "::Finalize"}}); + {"function.kind", std::string(kind_name()) + "::Output"}}); KernelContext ctx{plan()->query_context()->exec_context()}; ARROW_ASSIGN_OR_RAISE(auto merged, ScalarAggregateKernel::MergeAll( kernels_[i], &ctx, std::move(states_[i]))); RETURN_NOT_OK(kernels_[i]->finalize(&ctx, &batch.values[i])); } + PlaceFields(batch, kernels_.size(), segmenter_values_); - return output_->InputReceived(this, std::move(batch)); + ARROW_RETURN_NOT_OK(output_->InputReceived(this, std::move(batch))); + total_output_batches_++; Review Comment: Can you document this somewhere? It wasn't obvious until I read this line. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org