pitrou commented on a change in pull request #11793:
URL: https://github.com/apache/arrow/pull/11793#discussion_r763906775



##########
File path: cpp/src/arrow/compute/kernels/scalar_compare.cc
##########
@@ -439,6 +472,224 @@ struct ScalarMinMax {
   }
 };
 
+template <typename Type, typename Op>
+Status ExecBinaryMinMaxScalar(KernelContext* ctx,
+                              const ElementWiseAggregateOptions& options,
+                              const ExecBatch& batch, Datum* out) {
+  if (batch.values.empty()) {
+    return Status::OK();
+  }
+  auto output = checked_cast<BaseBinaryScalar*>(out->scalar().get());
+  if (!options.skip_nulls) {
+    // any nulls in the input will produce a null output
+    for (const auto& value : batch.values) {
+      if (!value.scalar()->is_valid) {
+        output->is_valid = false;
+        return Status::OK();
+      }
+    }
+  }
+  const auto& first_scalar = *batch.values.front().scalar();
+  string_view result = UnboxScalar<Type>::Unbox(first_scalar);
+  bool valid = first_scalar.is_valid;
+  for (size_t i = 1; i < batch.values.size(); i++) {
+    const auto& scalar = *batch[i].scalar();
+    if (!scalar.is_valid) {
+      DCHECK(options.skip_nulls);
+      continue;
+    } else {
+      string_view value = UnboxScalar<Type>::Unbox(scalar);
+      result = !valid ? value : Op::Call(result, value);
+      valid = true;
+    }
+  }
+  if (valid) {
+    ARROW_ASSIGN_OR_RAISE(output->value, ctx->Allocate(result.size()));
+    std::copy(result.begin(), result.end(), output->value->mutable_data());
+    output->is_valid = true;
+  } else {
+    output->is_valid = false;
+  }
+  return Status::OK();
+}
+
+template <typename Type, typename Op>
+struct BinaryScalarMinMax {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+  using offset_type = typename Type::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
+    if (std::all_of(batch.values.begin(), batch.values.end(),
+                    [](const Datum& d) { return d.is_scalar(); })) {
+      return ExecBinaryMinMaxScalar<Type, Op>(ctx, options, batch, out);
+    }
+    return ExecContainingArrays(ctx, options, batch, out);
+  }
+
+  static Status ExecContainingArrays(KernelContext* ctx,
+                                     const ElementWiseAggregateOptions& 
options,
+                                     const ExecBatch& batch, Datum* out) {
+    // Presize data to avoid reallocations, using an upper bound estimation of 
final size.
+    int64_t estimated_final_size = EstimateOutputSize(batch);
+    BuilderType builder(ctx->memory_pool());
+    RETURN_NOT_OK(builder.Reserve(batch.length));
+    RETURN_NOT_OK(builder.ReserveData(estimated_final_size));
+
+    for (int64_t row = 0; row < batch.length; row++) {
+      util::optional<string_view> result;
+      auto visit_value = [&](string_view value) {
+        result = !result ? value : Op::Call(*result, value);
+      };
+
+      for (size_t col = 0; col < batch.values.size(); col++) {
+        if (batch[col].is_scalar()) {
+          const auto& scalar = *batch[col].scalar();
+          if (scalar.is_valid) {
+            visit_value(UnboxScalar<Type>::Unbox(scalar));
+          } else if (!options.skip_nulls) {
+            result = util::nullopt;
+            break;
+          }
+        } else {
+          const auto& array = *batch[col].array();
+          if (!array.MayHaveNulls() ||
+              bit_util::GetBit(array.buffers[0]->data(), array.offset + row)) {
+            const auto offsets = array.GetValues<offset_type>(1);
+            const auto data = array.GetValues<uint8_t>(2, 
/*absolute_offset=*/0);
+            const int64_t length = offsets[row + 1] - offsets[row];
+            visit_value(
+                string_view(reinterpret_cast<const char*>(data + 
offsets[row]), length));
+          } else if (!options.skip_nulls) {
+            result = util::nullopt;
+            break;
+          }
+        }
+      }
+
+      if (result) {
+        builder.Append(*result);
+      } else {
+        builder.AppendNull();

Review comment:
       For the record, `builder.UnsafeAppendNull` is ok here, as the null 
bitmap is presized to exactly the right size using `builder.Reserve`. It's the 
string data reservation part (`builder.ReserveData`) that is not an upper bound.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to