This is an automated email from the ASF dual-hosted git repository.
pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d11916f154 GH-46856: [C++][Python] Add binary view comparison kernels
(#49964)
d11916f154 is described below
commit d11916f1543f0c9847f83a6d06e2a0bf0f8d5d8a
Author: Roman Kvasnytskyi <[email protected]>
AuthorDate: Tue Jun 9 16:16:01 2026 +0200
GH-46856: [C++][Python] Add binary view comparison kernels (#49964)
### Rationale for this change
`pyarrow.compute.equal` fails for `pa.binary_view()` arrays because C++
compute has no registered comparison kernel for `(binary_view, binary_view)`.
This fixes that missing kernel path and also enables the same comparisons
for `utf8_view`.
### What changes are included in this PR?
This adds comparison kernel support for `binary_view` and `utf8_view`.
The following functions now work for same-type inputs:
- `equal`
- `not_equal`
- `greater`
- `greater_equal`
- `less`
- `less_equal`
### Are these changes tested?
Added C++ tests covering:
- inline and out-of-line values
- nulls
- sliced arrays
- array-array comparisons
- array-scalar and scalar-array comparisons
- all six comparison functions
Added Python regression tests for `pa.binary_view()` and
`pa.string_view()`.
Verified the same cases fail before this patch at
`a0d2885b101acb439f7f79ec2237028974e74e64` with `ArrowNotImplementedError: no
kernel matching input types`.
### Are there any user-facing changes?
`pyarrow.compute` comparison functions now work for `pa.binary_view()` and
`pa.string_view()` arrays where they previously failed with a missing kernel
error.
### AI Usage
Tests were generated by LLM agents along with part or PR summary
Addresses: GH-46856
Partially addresses: GH-44336
* GitHub Issue: #46856
Authored-by: Roman Kvasnytskyi <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/kernels/codegen_internal.h | 16 +++++++
cpp/src/arrow/compute/kernels/scalar_compare.cc | 6 +++
.../arrow/compute/kernels/scalar_compare_test.cc | 49 ++++++++++++++++++++++
3 files changed, 71 insertions(+)
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h
b/cpp/src/arrow/compute/kernels/codegen_internal.h
index 15a946fbdb..d33ffacb2f 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -350,6 +350,22 @@ struct ArrayIterator<Type, enable_if_base_binary<Type>> {
}
};
+template <typename Type>
+struct ArrayIterator<Type, enable_if_binary_view_like<Type>> {
+ const BinaryViewType::c_type* views;
+ const std::shared_ptr<Buffer>* data_buffers;
+ int64_t position;
+
+ explicit ArrayIterator(const ArraySpan& arr)
+ : views(arr.GetValues<BinaryViewType::c_type>(1)),
+ data_buffers(arr.GetVariadicBuffers().data()),
+ position(0) {}
+
+ std::string_view operator()() {
+ return util::FromBinaryView(views[position++], data_buffers);
+ }
+};
+
template <>
struct ArrayIterator<FixedSizeBinaryType> {
const ArraySpan& arr;
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare.cc
b/cpp/src/arrow/compute/kernels/scalar_compare.cc
index 773a3f684b..3dfd66655e 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare.cc
@@ -433,6 +433,12 @@ std::shared_ptr<ScalarFunction>
MakeCompareFunction(std::string name, FunctionDo
GenerateVarBinaryBase<applicator::ScalarBinaryEqualTypes, BooleanType,
Op>(*ty);
DCHECK_OK(func->AddKernel({ty, ty}, boolean(), std::move(exec)));
}
+ for (const auto& ty : BinaryViewTypes()) {
+ auto exec =
+ GenerateVarBinaryViewBase<applicator::ScalarBinaryEqualTypes,
BooleanType, Op>(
+ *ty);
+ DCHECK_OK(func->AddKernel({ty, ty}, boolean(), std::move(exec)));
+ }
for (const auto id : {Type::DECIMAL128, Type::DECIMAL256}) {
auto exec = GenerateDecimal<applicator::ScalarBinaryEqualTypes,
BooleanType, Op>(id);
diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
index 23c7ab21bd..2aae5bf2ee 100644
--- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc
@@ -1196,6 +1196,55 @@ TEST_F(TestStringCompareKernel, RandomCompareArrayArray)
{
}
}
+TEST(TestBinaryViewCompareKernel, ArrayArray) {
+ const auto cases = std::vector<std::shared_ptr<DataType>>{binary_view(),
utf8_view()};
+ const auto expected = std::vector<std::pair<std::string, std::string>>{
+ {"equal", "[true, false, false, false, false, false, null]"},
+ {"not_equal", "[false, true, true, true, true, true, null]"},
+ {"greater", "[false, false, false, false, false, true, null]"},
+ {"greater_equal", "[true, false, false, false, false, true, null]"},
+ {"less", "[false, true, true, true, true, false, null]"},
+ {"less_equal", "[true, true, true, true, true, false, null]"}};
+
+ for (const auto& ty : cases) {
+ auto lhs =
+ ArrayFromJSON(ty, R"(["", "abc", "abcdefghijkl", "abcdefghijklm",
"prefix_same_A",
+ "samepref_size", null])");
+ auto rhs = ArrayFromJSON(
+ ty, R"(["", "abd", "abcdefghijklm", "abcdefghijklz", "prefix_same_B",
+ "samepref", null])");
+
+ CheckScalarBinary("equal", ArrayFromJSON(ty, R"([])"), ArrayFromJSON(ty,
R"([])"),
+ ArrayFromJSON(boolean(), R"([])"));
+ CheckScalarBinary("equal", ArrayFromJSON(ty, R"([null])"),
+ ArrayFromJSON(ty, R"([null])"),
+ ArrayFromJSON(boolean(), R"([null])"));
+ for (const auto& function_and_expected : expected) {
+ CheckScalarBinary(function_and_expected.first, lhs, rhs,
+ ArrayFromJSON(boolean(),
function_and_expected.second));
+ }
+ }
+}
+
+TEST(TestBinaryViewCompareKernel, ArrayScalar) {
+ for (const auto& ty : {binary_view(), utf8_view()}) {
+ auto arr = ArrayFromJSON(ty, R"(["", "abc", "abcdefghijklmnop", null])");
+ auto scalar = ScalarFromJSON(ty, R"("abc")");
+ auto null_scalar = ScalarFromJSON(ty, "null");
+
+ CheckScalarBinary("equal", arr, scalar,
+ ArrayFromJSON(boolean(), R"([false, true, false,
null])"));
+ CheckScalarBinary("equal", scalar, arr,
+ ArrayFromJSON(boolean(), R"([false, true, false,
null])"));
+ CheckScalarBinary("greater", arr, scalar,
+ ArrayFromJSON(boolean(), R"([false, false, true,
null])"));
+ CheckScalarBinary("less", scalar, arr,
+ ArrayFromJSON(boolean(), R"([false, false, true,
null])"));
+ CheckScalarBinary("equal", arr, null_scalar,
+ ArrayFromJSON(boolean(), R"([null, null, null, null])"));
+ }
+}
+
template <typename T>
class TestVarArgsCompare : public ::testing::Test {
protected: