This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 3af6b398 fix: eliminate per-element heap allocs in is_in kernel for 
binary types (#745)
3af6b398 is described below

commit 3af6b3984fde2bfb18de5c2fd6eeacd11c613bff
Author: Matt Topol <[email protected]>
AuthorDate: Tue Apr 7 12:55:28 2026 -0400

    fix: eliminate per-element heap allocs in is_in kernel for binary types 
(#745)
    
    ## Summary
    
    - Add `BinaryMemoTable.ExistsDirect` that inlines the hash table probe
    loop, avoiding the closure in `HashTable.Lookup` that causes `val
    []byte` to escape to the heap
    - Add `isInBinaryDirect` specialized kernel path that bypasses the
    `visitBinary` → `VisitBitBlocksShort` closure chain by directly
    iterating with `OptionalBitBlockCounter`
    - Route `BinaryDataType` dispatch in `DispatchIsIn` to the new direct
    path (handles both int32 and int64 offsets)
    
    ## Motivation
    
    The `is_in` kernel for binary types allocated once per input element
    because the `[]byte` value escaped to the heap through a closure chain:
    
    1. `visitBinary` slices `rawBytes[offsets[pos]:offsets[pos+1]]` and
    passes to a callback
    2. The callback calls `BinaryMemoTable.Exists(v)`
    3. `Exists` calls `lookup` which creates a closure capturing `val`
    4. The closure is passed to `HashTable.Lookup`, causing escape analysis
    to move `val` to the heap
    
    Closes #736
    
    ## Benchmark (100k rows, 10-element value set)
    
    | Metric | Before | After | Improvement |
    |--------|--------|-------|-------------|
    | ns/op | 4,133,679 | 923,565 | **4.5x faster** |
    | B/op | 2,435,327 | 33,092 | **73x less memory** |
    | allocs/op | 100,075 | 70 | **1,430x fewer allocs** |
    
    All existing `TestIsInBinary` subtests pass (binary, large\_binary,
    utf8, large\_utf8 × all null matching behaviors).
---
 .../compute/internal/kernels/scalar_set_lookup.go  | 99 +++++++++++++++++++++-
 arrow/compute/scalar_set_lookup_test.go            | 41 +++++++++
 internal/hashing/xxh3_memo_table.go                | 26 ++++++
 3 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/arrow/compute/internal/kernels/scalar_set_lookup.go 
b/arrow/compute/internal/kernels/scalar_set_lookup.go
index c3562675..fe9841ac 100644
--- a/arrow/compute/internal/kernels/scalar_set_lookup.go
+++ b/arrow/compute/internal/kernels/scalar_set_lookup.go
@@ -235,7 +235,12 @@ func DispatchIsIn(state lookupState, in *exec.ArraySpan, 
out *exec.ExecResult) e
 
        switch ty := inType.(type) {
        case arrow.BinaryDataType:
-               return isInKernelExec(state.(*SetLookupState[[]byte]), in, out)
+               switch ty.Layout().Buffers[1].ByteWidth {
+               case 8:
+                       return 
isInBinaryDirect[int64](state.(*SetLookupState[[]byte]), in, out)
+               default:
+                       return 
isInBinaryDirect[int32](state.(*SetLookupState[[]byte]), in, out)
+               }
        case arrow.FixedWidthDataType:
                switch ty.Bytes() {
                case 1:
@@ -254,6 +259,98 @@ func DispatchIsIn(state lookupState, in *exec.ArraySpan, 
out *exec.ExecResult) e
        }
 }
 
+// isInBinaryDirect is a specialized is_in path for binary/string types
+// that avoids the nested closure chain (isInKernelExec -> visitBinary ->
+// VisitBitBlocksShort) which causes per-element heap allocations due to
+// closure escape analysis. It inlines the bit block iteration and calls
+// BinaryMemoTable.ExistsDirect which also avoids closure-based lookup.
+func isInBinaryDirect[OffsetT int32 | int64](state *SetLookupState[[]byte], in 
*exec.ArraySpan, out *exec.ExecResult) error {
+       if in.Len == 0 {
+               return nil
+       }
+
+       writerBool := bitutil.NewBitmapWriter(out.Buffers[1].Buf, 
int(out.Offset), int(out.Len))
+       defer writerBool.Finish()
+       writerNulls := bitutil.NewBitmapWriter(out.Buffers[0].Buf, 
int(out.Offset), int(out.Len))
+       defer writerNulls.Finish()
+
+       valueSetHasNull := state.NullIndex != -1
+       rawBytes := in.Buffers[2].Buf
+       offsets := exec.GetSpanOffsets[OffsetT](in, 1)
+       lookup := state.Lookup.(*hashing.BinaryMemoTable)
+
+       bitmap := in.Buffers[0].Buf
+       counter := bitutils.NewOptionalBitBlockCounter(bitmap, in.Offset, 
in.Len)
+       pos := int64(0)
+       for pos < in.Len {
+               block := counter.NextBlock()
+               if block.AllSet() {
+                       for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+                               val := rawBytes[offsets[pos]:offsets[pos+1]]
+                               if lookup.ExistsDirect(val) {
+                                       writerBool.Set()
+                                       writerNulls.Set()
+                               } else if state.NullBehavior == 
NullMatchingInconclusive && valueSetHasNull {
+                                       writerBool.Clear()
+                                       writerNulls.Clear()
+                               } else {
+                                       writerBool.Clear()
+                                       writerNulls.Set()
+                               }
+                               writerBool.Next()
+                               writerNulls.Next()
+                       }
+               } else if block.NoneSet() {
+                       for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+                               switch {
+                               case state.NullBehavior == NullMatchingMatch && 
valueSetHasNull:
+                                       writerBool.Set()
+                                       writerNulls.Set()
+                               case state.NullBehavior == NullMatchingSkip || 
(!valueSetHasNull && state.NullBehavior == NullMatchingMatch):
+                                       writerBool.Clear()
+                                       writerNulls.Set()
+                               default:
+                                       writerBool.Clear()
+                                       writerNulls.Clear()
+                               }
+                               writerBool.Next()
+                               writerNulls.Next()
+                       }
+               } else {
+                       for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
+                               if bitutil.BitIsSet(bitmap, int(in.Offset+pos)) 
{
+                                       val := 
rawBytes[offsets[pos]:offsets[pos+1]]
+                                       if lookup.ExistsDirect(val) {
+                                               writerBool.Set()
+                                               writerNulls.Set()
+                                       } else if state.NullBehavior == 
NullMatchingInconclusive && valueSetHasNull {
+                                               writerBool.Clear()
+                                               writerNulls.Clear()
+                                       } else {
+                                               writerBool.Clear()
+                                               writerNulls.Set()
+                                       }
+                               } else {
+                                       switch {
+                                       case state.NullBehavior == 
NullMatchingMatch && valueSetHasNull:
+                                               writerBool.Set()
+                                               writerNulls.Set()
+                                       case state.NullBehavior == 
NullMatchingSkip || (!valueSetHasNull && state.NullBehavior == 
NullMatchingMatch):
+                                               writerBool.Clear()
+                                               writerNulls.Set()
+                                       default:
+                                               writerBool.Clear()
+                                               writerNulls.Clear()
+                                       }
+                               }
+                               writerBool.Next()
+                               writerNulls.Next()
+                       }
+               }
+       }
+       return nil
+}
+
 func isInKernelExec[T hashing.MemoTypes](state *SetLookupState[T], in 
*exec.ArraySpan, out *exec.ExecResult) error {
        writerBool := bitutil.NewBitmapWriter(out.Buffers[1].Buf, 
int(out.Offset), int(out.Len))
        defer writerBool.Finish()
diff --git a/arrow/compute/scalar_set_lookup_test.go 
b/arrow/compute/scalar_set_lookup_test.go
index 58a666bf..b28c3d75 100644
--- a/arrow/compute/scalar_set_lookup_test.go
+++ b/arrow/compute/scalar_set_lookup_test.go
@@ -18,6 +18,7 @@ package compute_test
 
 import (
        "context"
+       "fmt"
        "strings"
        "sync"
        "testing"
@@ -639,3 +640,43 @@ func (ss *ScalarSetLookupSuite) TearDownTest() {
 func TestScalarSetLookup(t *testing.T) {
        suite.Run(t, new(ScalarSetLookupSuite))
 }
+
+func BenchmarkIsInBinary(b *testing.B) {
+       const numRows = 100_000
+       const valueSetSize = 10
+
+       mem := memory.DefaultAllocator
+       ctx := compute.WithAllocator(context.TODO(), mem)
+
+       bldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+       defer bldr.Release()
+       for i := range numRows {
+               v := []byte(fmt.Sprintf("value-%08d", i%1000))
+               bldr.Append(v)
+       }
+       input := bldr.NewArray()
+       defer input.Release()
+
+       vsBldr := array.NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
+       defer vsBldr.Release()
+       for i := range valueSetSize {
+               v := []byte(fmt.Sprintf("value-%08d", i))
+               vsBldr.Append(v)
+       }
+       valueSet := vsBldr.NewArray()
+       defer valueSet.Release()
+
+       opts := compute.SetOptions{
+               ValueSet: compute.NewDatumWithoutOwning(valueSet),
+       }
+
+       b.ResetTimer()
+       b.ReportAllocs()
+       for range b.N {
+               result, err := compute.IsIn(ctx, opts, 
compute.NewDatumWithoutOwning(input))
+               if err != nil {
+                       b.Fatal(err)
+               }
+               result.Release()
+       }
+}
diff --git a/internal/hashing/xxh3_memo_table.go 
b/internal/hashing/xxh3_memo_table.go
index f0ca131a..26b4ce64 100644
--- a/internal/hashing/xxh3_memo_table.go
+++ b/internal/hashing/xxh3_memo_table.go
@@ -225,6 +225,32 @@ func (b *BinaryMemoTable) Exists(val []byte) bool {
        return ok
 }
 
+// ExistsDirect checks if val exists in the table by inlining the hash
+// table probe loop directly. This avoids the closure allocation that
+// occurs in the normal Exists -> lookup -> HashTable.Lookup path,
+// where the comparison closure captures val and causes it to escape
+// to the heap.
+func (b *BinaryMemoTable) ExistsDirect(val []byte) bool {
+       const perturbShift uint8 = 5
+
+       h := Hash(val, 0)
+       v := b.tbl.fixHash(h)
+       idx := v & b.tbl.capMask
+       perturb := (v >> uint64(perturbShift)) + 1
+
+       for {
+               e := &b.tbl.entries[idx]
+               if e.h == v && bytes.Equal(val, 
b.builder.Value(int(e.payload.val))) {
+                       return true
+               }
+               if e.h == sentinel {
+                       return false
+               }
+               idx = (idx + perturb) & b.tbl.capMask
+               perturb = (perturb >> uint64(perturbShift)) + 1
+       }
+}
+
 // Get returns the index of the specified value in the table or KeyNotFound,
 // and a boolean indicating whether it was found in the table.
 func (b *BinaryMemoTable) Get(val interface{}) (int, bool) {

Reply via email to