This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 3af6b398 fix: eliminate per-element heap allocs in is_in kernel for
binary types (#745)
3af6b398 is described below
commit 3af6b3984fde2bfb18de5c2fd6eeacd11c613bff
Author: Matt Topol <[email protected]>
AuthorDate: Tue Apr 7 12:55:28 2026 -0400
fix: eliminate per-element heap allocs in is_in kernel for binary types
(#745)
## Summary
- Add `BinaryMemoTable.ExistsDirect` that inlines the hash table probe
loop, avoiding the closure in `HashTable.Lookup` that causes `val
[]byte` to escape to the heap
- Add `isInBinaryDirect` specialized kernel path that bypasses the
`visitBinary` → `VisitBitBlocksShort` closure chain by directly
iterating with `OptionalBitBlockCounter`
- Route `BinaryDataType` dispatch in `DispatchIsIn` to the new direct
path (handles both int32 and int64 offsets)
## Motivation
The `is_in` kernel for binary types allocated once per input element
because the `[]byte` value escaped to the heap through a closure chain:
1. `visitBinary` slices `rawBytes[offsets[pos]:offsets[pos+1]]` and
passes to a callback
2. The callback calls `BinaryMemoTable.Exists(v)`
3. `Exists` calls `lookup` which creates a closure capturing `val`
4. The closure is passed to `HashTable.Lookup`, causing escape analysis
to move `val` to the heap
Closes #736
## Benchmark (100k rows, 10-element value set)
| Metric | Before | After | Improvement |
|--------|--------|-------|-------------|
| ns/op | 4,133,679 | 923,565 | **4.5x faster** |
| B/op | 2,435,327 | 33,092 | **73x less memory** |
| allocs/op | 100,075 | 70 | **1,430x fewer allocs** |
All existing `TestIsInBinary` subtests pass (binary, large\_binary,
utf8, large\_utf8 × all null matching behaviors).
---
.../compute/internal/kernels/scalar_set_lookup.go | 99 +++++++++++++++++++++-
arrow/compute/scalar_set_lookup_test.go | 41 +++++++++
internal/hashing/xxh3_memo_table.go | 26 ++++++
3 files changed, 165 insertions(+), 1 deletion(-)
diff --git a/arrow/compute/internal/kernels/scalar_set_lookup.go
b/arrow/compute/internal/kernels/scalar_set_lookup.go
index c3562675..fe9841ac 100644
--- a/arrow/compute/internal/kernels/scalar_set_lookup.go
+++ b/arrow/compute/internal/kernels/scalar_set_lookup.go
@@ -235,7 +235,12 @@ func DispatchIsIn(state lookupState, in *exec.ArraySpan,
out *exec.ExecResult) e
switch ty := inType.(type) {
case arrow.BinaryDataType:
- return isInKernelExec(state.(*SetLookupState[[]byte]), in, out)
+ switch ty.Layout().Buffers[1].ByteWidth {
+ case 8:
+ return
isInBinaryDirect[int64](state.(*SetLookupState[[]byte]), in, out)
+ default:
+ return
isInBinaryDirect[int32](state.(*SetLookupState[[]byte]), in, out)
+ }
case arrow.FixedWidthDataType:
switch ty.Bytes() {
case 1:
@@ -254,6 +259,98 @@ func DispatchIsIn(state lookupState, in *exec.ArraySpan,
out *exec.ExecResult) e
}
}
+// isInBinaryDirect is a specialized is_in path for binary/string types
+// that avoids the nested closure chain (isInKernelExec -> visitBinary ->
+// VisitBitBlocksShort) which causes per-element heap allocations due to
+// closure escape analysis. It inlines the bit block iteration and calls
+// BinaryMemoTable.ExistsDirect which also avoids closure-based lookup.
+func isInBinaryDirect[OffsetT int32 | int64](state *SetLookupState[[]byte], in
*exec.ArraySpan, out *exec.ExecResult) error {
+ if in.Len == 0 {
+ return nil
+ }
+
+ writerBool := bitutil.NewBitmapWriter(out.Buffers[1].Buf,
int(out.Offset), int(out.Len))
+ defer writerBool.Finish()
+ writerNulls := bitutil.NewBitmapWriter(out.Buffers[0].Buf,
int(out.Offset), int(out.Len))
+ defer writerNulls.Finish()
+
+ valueSetHasNull := state.NullIndex != -1
+ rawBytes := in.Buffers[2].Buf
+ offsets := exec.GetSpanOffsets[OffsetT](in, 1)
+ lookup := state.Lookup.(*hashing.BinaryMemoTable)
+
+ bitmap := in.Buffers[0].Buf
+ counter := bitutils.NewOptionalBitBlockCounter(bitmap, in.Offset,
in.Len)
+ pos := int64(0)
+ for pos < in.Len {
+ block := counter.NextBlock()
+ if block.AllSet() {
+ for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+ val := rawBytes[offsets[pos]:offsets[pos+1]]
+ if lookup.ExistsDirect(val) {
+ writerBool.Set()
+ writerNulls.Set()
+ } else if state.NullBehavior ==
NullMatchingInconclusive && valueSetHasNull {
+ writerBool.Clear()
+ writerNulls.Clear()
+ } else {
+ writerBool.Clear()
+ writerNulls.Set()
+ }
+ writerBool.Next()
+ writerNulls.Next()
+ }
+ } else if block.NoneSet() {
+ for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+ switch {
+ case state.NullBehavior == NullMatchingMatch &&
valueSetHasNull:
+ writerBool.Set()
+ writerNulls.Set()
+ case state.NullBehavior == NullMatchingSkip ||
(!valueSetHasNull && state.NullBehavior == NullMatchingMatch):
+ writerBool.Clear()
+ writerNulls.Set()
+ default:
+ writerBool.Clear()
+ writerNulls.Clear()
+ }
+ writerBool.Next()
+ writerNulls.Next()
+ }
+ } else {
+ for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+ if bitutil.BitIsSet(bitmap, int(in.Offset+pos))
{
+ val :=
rawBytes[offsets[pos]:offsets[pos+1]]
+ if lookup.ExistsDirect(val) {
+ writerBool.Set()
+ writerNulls.Set()
+ } else if state.NullBehavior ==
NullMatchingInconclusive && valueSetHasNull {
+ writerBool.Clear()
+ writerNulls.Clear()
+ } else {
+ writerBool.Clear()
+ writerNulls.Set()
+ }
+ } else {
+ switch {
+ case state.NullBehavior ==
NullMatchingMatch && valueSetHasNull:
+ writerBool.Set()
+ writerNulls.Set()
+ case state.NullBehavior ==
NullMatchingSkip || (!valueSetHasNull && state.NullBehavior ==
NullMatchingMatch):
+ writerBool.Clear()
+ writerNulls.Set()
+ default:
+ writerBool.Clear()
+ writerNulls.Clear()
+ }
+ }
+ writerBool.Next()
+ writerNulls.Next()
+ }
+ }
+ }
+ return nil
+}
+
func isInKernelExec[T hashing.MemoTypes](state *SetLookupState[T], in
*exec.ArraySpan, out *exec.ExecResult) error {
writerBool := bitutil.NewBitmapWriter(out.Buffers[1].Buf,
int(out.Offset), int(out.Len))
defer writerBool.Finish()
diff --git a/arrow/compute/scalar_set_lookup_test.go
b/arrow/compute/scalar_set_lookup_test.go
index 58a666bf..b28c3d75 100644
--- a/arrow/compute/scalar_set_lookup_test.go
+++ b/arrow/compute/scalar_set_lookup_test.go
@@ -18,6 +18,7 @@ package compute_test
import (
"context"
+ "fmt"
"strings"
"sync"
"testing"
@@ -639,3 +640,43 @@ func (ss *ScalarSetLookupSuite) TearDownTest() {
func TestScalarSetLookup(t *testing.T) {
suite.Run(t, new(ScalarSetLookupSuite))
}
+
+func BenchmarkIsInBinary(b *testing.B) {
+ const numRows = 100_000
+ const valueSetSize = 10
+
+ mem := memory.DefaultAllocator
+ ctx := compute.WithAllocator(context.TODO(), mem)
+
+ bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer bldr.Release()
+ for i := range numRows {
+ v := []byte(fmt.Sprintf("value-%08d", i%1000))
+ bldr.Append(v)
+ }
+ input := bldr.NewArray()
+ defer input.Release()
+
+ vsBldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+ defer vsBldr.Release()
+ for i := range valueSetSize {
+ v := []byte(fmt.Sprintf("value-%08d", i))
+ vsBldr.Append(v)
+ }
+ valueSet := vsBldr.NewArray()
+ defer valueSet.Release()
+
+ opts := compute.SetOptions{
+ ValueSet: compute.NewDatumWithoutOwning(valueSet),
+ }
+
+ b.ResetTimer()
+ b.ReportAllocs()
+ for range b.N {
+ result, err := compute.IsIn(ctx, opts,
compute.NewDatumWithoutOwning(input))
+ if err != nil {
+ b.Fatal(err)
+ }
+ result.Release()
+ }
+}
diff --git a/internal/hashing/xxh3_memo_table.go
b/internal/hashing/xxh3_memo_table.go
index f0ca131a..26b4ce64 100644
--- a/internal/hashing/xxh3_memo_table.go
+++ b/internal/hashing/xxh3_memo_table.go
@@ -225,6 +225,32 @@ func (b *BinaryMemoTable) Exists(val []byte) bool {
return ok
}
+// ExistsDirect checks if val exists in the table by inlining the hash
+// table probe loop directly. This avoids the closure allocation that
+// occurs in the normal Exists -> lookup -> HashTable.Lookup path,
+// where the comparison closure captures val and causes it to escape
+// to the heap.
+func (b *BinaryMemoTable) ExistsDirect(val []byte) bool {
+ const perturbShift uint8 = 5
+
+ h := Hash(val, 0)
+ v := b.tbl.fixHash(h)
+ idx := v & b.tbl.capMask
+ perturb := (v >> uint64(perturbShift)) + 1
+
+ for {
+ e := &b.tbl.entries[idx]
+ if e.h == v && bytes.Equal(val,
b.builder.Value(int(e.payload.val))) {
+ return true
+ }
+ if e.h == sentinel {
+ return false
+ }
+ idx = (idx + perturb) & b.tbl.capMask
+ perturb = (perturb >> uint64(perturbShift)) + 1
+ }
+}
+
// Get returns the index of the specified value in the table or KeyNotFound,
// and a boolean indicating whether it was found in the table.
func (b *BinaryMemoTable) Get(val interface{}) (int, bool) {