This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new e0df4f91 `UnsafeAppendBoolToBitmap` for dictionary and REE builders 
(#758)
e0df4f91 is described below

commit e0df4f91b5d00f647eaffd875ef40ffe159f8838
Author: Lucas Valente <[email protected]>
AuthorDate: Tue Apr 14 20:10:11 2026 +0200

    `UnsafeAppendBoolToBitmap` for dictionary and REE builders (#758)
    
    ### Rationale for this change
    
    For more context:
    https://github.com/apache/arrow-go/pull/558/changes#r2758798540
    
    The Builders for REE and dict-encoded arrays were inheriting the
    `UnsafeAppendBoolToBitmap()` and `UnsafeAppend()` from the `Builder`
    interface defined in `arrow/array/builder.go`. The default
    implementation does not bump the length of the inner `idxBuilder` or
    `runEndsBuilder`, which causes it to be in an inconsistent state where
    the parent has greater length than the index/run-ends builder. This
    causes a panic when instancing a record batch.
    
    ### What changes are included in this PR?
    
    This PR makes `UnsafeAppendBoolToBitmap()` for `RunEndEncodedBuilder`
    panic as there is not a good semantic meaning for this method in case of
    REE.
    
    It also implements `UnsafeAppendBoolToBitmap()` and `UnsafeAppend()` for
    the Dictionary builders. This allows users to use `bldr.Reserve(n)`
    followed by `n` calls to `UnsafeAppend()` or
    `UnsafeAppendBoolToBitmap()` without the need to check if the array
    needs to grow everytime.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No. The API was already there (inherited from `Builder`), but the
    behavior was incorrect.
    
    ---------
    
    Co-authored-by: Matt Topol <[email protected]>
---
 arrow/array/dictionary.go      | 93 +++++++++++++++++++++++++++++++++++++++++-
 arrow/array/dictionary_test.go | 41 +++++++++++++++++++
 arrow/array/encoded.go         |  4 ++
 3 files changed, 137 insertions(+), 1 deletion(-)

diff --git a/arrow/array/dictionary.go b/arrow/array/dictionary.go
index 109d2a97..8f9fe245 100644
--- a/arrow/array/dictionary.go
+++ b/arrow/array/dictionary.go
@@ -314,7 +314,8 @@ func arrayApproxEqualDict(l, r *Dictionary, opt 
equalOption) bool {
 // helper for building the properly typed indices of the dictionary builder
 type IndexBuilder struct {
        Builder
-       Append func(int)
+       Append       func(int)
+       UnsafeAppend func(int)
 }
 
 func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) 
(ret IndexBuilder, err error) {
@@ -324,34 +325,58 @@ func createIndexBuilder(mem memory.Allocator, dt 
arrow.FixedWidthDataType) (ret
                ret.Append = func(idx int) {
                        ret.Builder.(*Int8Builder).Append(int8(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Int8Builder).UnsafeAppend(int8(idx))
+               }
        case arrow.UINT8:
                ret.Append = func(idx int) {
                        ret.Builder.(*Uint8Builder).Append(uint8(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Uint8Builder).UnsafeAppend(uint8(idx))
+               }
        case arrow.INT16:
                ret.Append = func(idx int) {
                        ret.Builder.(*Int16Builder).Append(int16(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Int16Builder).UnsafeAppend(int16(idx))
+               }
        case arrow.UINT16:
                ret.Append = func(idx int) {
                        ret.Builder.(*Uint16Builder).Append(uint16(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Uint16Builder).UnsafeAppend(uint16(idx))
+               }
        case arrow.INT32:
                ret.Append = func(idx int) {
                        ret.Builder.(*Int32Builder).Append(int32(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Int32Builder).UnsafeAppend(int32(idx))
+               }
        case arrow.UINT32:
                ret.Append = func(idx int) {
                        ret.Builder.(*Uint32Builder).Append(uint32(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Uint32Builder).UnsafeAppend(uint32(idx))
+               }
        case arrow.INT64:
                ret.Append = func(idx int) {
                        ret.Builder.(*Int64Builder).Append(int64(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Int64Builder).UnsafeAppend(int64(idx))
+               }
        case arrow.UINT64:
                ret.Append = func(idx int) {
                        ret.Builder.(*Uint64Builder).Append(uint64(idx))
                }
+               ret.UnsafeAppend = func(idx int) {
+                       ret.Builder.(*Uint64Builder).UnsafeAppend(uint64(idx))
+               }
        default:
                debug.Assert(false, "dictionary index type must be integral")
                err = fmt.Errorf("dictionary index type must be integral, not 
%s", dt)
@@ -646,6 +671,14 @@ func (b *dictionaryBuilder) AppendEmptyValues(n int) {
        }
 }
 
+func (b *dictionaryBuilder) UnsafeAppendBoolToBitmap(v bool) {
+       if !v {
+               b.nulls += 1
+       }
+       b.length += 1
+       b.idxBuilder.UnsafeAppendBoolToBitmap(v)
+}
+
 func (b *dictionaryBuilder) Reserve(n int) {
        b.idxBuilder.Reserve(n)
 }
@@ -781,6 +814,13 @@ func (b *dictionaryBuilder) insertDictBytes(val []byte) 
error {
        return err
 }
 
+func (b *dictionaryBuilder) unsafeAppendValue(val interface{}) error {
+       idx, _, err := b.memoTable.GetOrInsert(val)
+       b.idxBuilder.UnsafeAppend(idx)
+       b.length += 1
+       return err
+}
+
 func (b *dictionaryBuilder) appendValue(val interface{}) error {
        idx, _, err := b.memoTable.GetOrInsert(val)
        b.idxBuilder.Append(idx)
@@ -990,7 +1030,39 @@ type dictBuilder[T arrow.ValueType] struct {
        dictionaryBuilder
 }
 
+func (b *dictBuilder[T]) UnsafeAppend(v T) {
+       // SAFETY: it is safe to ignore the value returned by the calls to 
`unsafeAppendValue()`
+       // here since `UnsafeAppend()` is statically typed and the only case in 
which that method
+       // errors is when trying to insert an invalid `interface{}` into the 
`memoTable`.
+       var err error
+       switch val := any(v).(type) {
+       case arrow.Duration:
+               err = b.unsafeAppendValue(int64(val))
+       case arrow.Timestamp:
+               err = b.unsafeAppendValue(int64(val))
+       case arrow.Time32:
+               err = b.unsafeAppendValue(int32(val))
+       case arrow.Time64:
+               err = b.unsafeAppendValue(int64(val))
+       case arrow.Date32:
+               err = b.unsafeAppendValue(int32(val))
+       case arrow.Date64:
+               err = b.unsafeAppendValue(int64(val))
+       case arrow.MonthInterval:
+               err = b.unsafeAppendValue(int32(val))
+       default:
+               err = b.unsafeAppendValue(v)
+       }
+       debug.Assert(err == nil, "Trying to insert wrong type into memoTable 
even though this method is statically typed. This is an implementation bug.")
+}
+
 func (b *dictBuilder[T]) Append(v T) error {
+       // TODO: it is safe to ignore the value returned by the calls to 
`appendValue()`
+       // here since `Append()` is statically typed and the only case in which 
that
+       // method errors is when trying to insert an invalid `interface{}` into 
the `memoTable`.
+       //
+       // This would be a breaking change to the public API of `dictBuilder`, 
so it needs
+       // to happen over a major release.
        switch val := any(v).(type) {
        case arrow.Duration:
                return b.appendValue(int64(val))
@@ -1058,6 +1130,12 @@ func (b *BinaryDictionaryBuilder) Append(v []byte) error 
{
                return nil
        }
 
+       // TODO: it is safe to ignore the value returned by the call to 
`appendBytes()`
+       // here since `Append()` is statically typed and the only case in which 
that
+       // method errors is when trying to insert an invalid `interface{}` into 
the `memoTable`.
+       //
+       // This would be a breaking change to the public API of 
`BinaryDictionaryBuilder`,
+       // so it needs to happen over a major release.
        return b.appendBytes(v)
 }
 
@@ -1134,6 +1212,13 @@ type fixedSizeDictionaryBuilder[T fsbType] struct {
 }
 
 func (b *fixedSizeDictionaryBuilder[T]) Append(v T) error {
+       // TODO: it is safe to ignore the value returned by the calls to 
`appendValue()`
+       // and `appendBytes()` here since `Append()` is statically typed and 
the only
+       // case in which these method error is when trying to insert an invalid
+       // `interface{}` into the `memoTable`.
+       //
+       // This would be a breaking change to the public API of 
`fixedSizeDictionaryBuilder`,
+       // so it needs to happen over a major release.
        if v, ok := any(v).([]byte); ok {
                return b.appendBytes(v[:b.byteWidth])
        }
@@ -1164,6 +1249,12 @@ type FixedSizeBinaryDictionaryBuilder struct {
 }
 
 func (b *FixedSizeBinaryDictionaryBuilder) Append(v []byte) error {
+       // TODO: it is safe to ignore the value returned by the calls to 
`appendValue()`
+       // here since `Append()` is statically typed and the only case in which 
that
+       // method errors is when trying to insert an invalid `interface{}` into 
the `memoTable`.
+       //
+       // This would be a breaking change to the public API of 
`FixedSizeBinaryDictionaryBuilder`,
+       // so it needs to happen over a major release.
        return b.appendValue(v[:b.byteWidth])
 }
 
diff --git a/arrow/array/dictionary_test.go b/arrow/array/dictionary_test.go
index 9b9d3b1f..24aab674 100644
--- a/arrow/array/dictionary_test.go
+++ b/arrow/array/dictionary_test.go
@@ -145,6 +145,47 @@ func (p *PrimitiveDictionaryTestSuite) 
TestDictionaryBuilderInit() {
        p.True(array.Equal(expected, arr))
 }
 
+func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderReserveAndAppend() 
{
+       expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, 
ValueType: p.typ}
+       bldr := array.NewDictionaryBuilder(p.mem, expectedType)
+       defer bldr.Release()
+
+       builder := reflect.ValueOf(bldr)
+       appendFn := builder.MethodByName("UnsafeAppend")
+       validFn := builder.MethodByName("UnsafeAppendBoolToBitmap")
+
+       bldr.Reserve(7)
+       validFn.Call([]reflect.Value{reflect.ValueOf(true)})
+       validFn.Call([]reflect.Value{reflect.ValueOf(false)})
+       appendFn.Call([]reflect.Value{reflect.ValueOf(0).Convert(p.reftyp)})
+       appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
+       validFn.Call([]reflect.Value{reflect.ValueOf(false)})
+       appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)})
+       appendFn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)})
+
+       p.EqualValues(7, bldr.Len())
+       p.EqualValues(2, bldr.NullN())
+
+       p.EqualValues(3, bldr.DictionarySize())
+
+       arr := bldr.NewArray().(*array.Dictionary)
+       defer arr.Release()
+
+       p.True(arrow.TypeEqual(expectedType, arr.DataType()))
+       expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, 
strings.NewReader("[0, 1, 2]"))
+       p.NoError(err)
+       defer expectedDict.Release()
+
+       expectedIndices, _, err := array.FromJSON(p.mem, 
expectedType.IndexType, strings.NewReader("[0, null, 0, 1, null, 1, 2]"))
+       p.NoError(err)
+       defer expectedIndices.Release()
+
+       expected := array.NewDictionaryArray(expectedType, expectedIndices, 
expectedDict)
+       defer expected.Release()
+
+       p.True(array.Equal(expected, arr))
+}
+
 func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() {
        valueType := p.typ
        dictArr, _, err := array.FromJSON(p.mem, valueType, 
strings.NewReader("[1, 2]"))
diff --git a/arrow/array/encoded.go b/arrow/array/encoded.go
index 08800d4b..85432a13 100644
--- a/arrow/array/encoded.go
+++ b/arrow/array/encoded.go
@@ -398,6 +398,10 @@ func (b *RunEndEncodedBuilder) AppendNulls(n int) {
        }
 }
 
+func (b *RunEndEncodedBuilder) UnsafeAppendBoolToBitmap(v bool) {
+       panic("Calling UnsafeAppendBoolToBitmap on a run-end encoded array is 
semantically undefined.")
+}
+
 func (b *RunEndEncodedBuilder) NullN() int {
        return UnknownNullCount
 }

Reply via email to