This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 6a332c9b fix(parquet/variant): correct binary search bounds in
ObjectValue.ValueByKey (#841)
6a332c9b is described below
commit 6a332c9b34b60b41ea7343a3cd4309c4bd80c5a8
Author: Jared Yu (余启正) <[email protected]>
AuthorDate: Mon Jun 8 10:22:26 2026 -0700
fix(parquet/variant): correct binary search bounds in
ObjectValue.ValueByKey (#841)
### Rationale for this change
Closes #842
The `ObjectValue.ValueByKey` function implements a custom binary search
on the `v.value` array of field IDs to look up fields by key. It
initializes its boundaries using an exclusive upper bound (`i, j := 0,
n`).
However, when the target key is smaller than the key at the `mid` index,
it updates the upper bound with:
`j = mid - 1`
Since `j` is exclusive, setting it to `mid - 1` excludes the index `mid
- 1` from the search space entirely. If the target key is located at
`mid - 1`, the lookup will fail and return `arrow.ErrNotFound`. This
results in missing fields or lookups failing on objects containing >= 32
fields.
### What changes are included in this PR?
- Correct the exclusive upper bound update in `ObjectValue.ValueByKey`
from `j = mid - 1` to `j = mid`.
### Are these changes tested?
- Added `TestObjectBinarySearch` in `parquet/variant/variant_test.go`
which constructs a 32-field object and verifies that all 32 lookups
using `ValueByKey` succeed.
### Are there any user-facing changes?
- No API changes. This is a correctness bug fix.
---
parquet/variant/variant.go | 2 +-
parquet/variant/variant_test.go | 25 +++++++++++++++++++++++++
2 files changed, 26 insertions(+), 1 deletion(-)
diff --git a/parquet/variant/variant.go b/parquet/variant/variant.go
index 00925564..1fe44cc5 100644
--- a/parquet/variant/variant.go
+++ b/parquet/variant/variant.go
@@ -405,7 +405,7 @@ func (v ObjectValue) ValueByKey(key string) (ObjectField,
error) {
Key: key,
Value: Value{value:
v.value[v.dataStart+offset:], meta: v.meta}}, nil
case 1:
- j = mid - 1
+ j = mid
}
}
diff --git a/parquet/variant/variant_test.go b/parquet/variant/variant_test.go
index 0a2d3154..5b76fd3a 100644
--- a/parquet/variant/variant_test.go
+++ b/parquet/variant/variant_test.go
@@ -18,6 +18,7 @@ package variant_test
import (
"encoding/json"
+ "fmt"
"math"
"os"
"path/filepath"
@@ -808,3 +809,27 @@ func TestVariantJSONRoundTrip(t *testing.T) {
}
assert.JSONEq(t, string(jsonBytes), variantFromJSON.String())
}
+
+func TestObjectBinarySearch(t *testing.T) {
+ var b variant.Builder
+ start := b.Offset()
+ fields := make([]variant.FieldEntry, 32)
+ for i := 0; i < 32; i++ {
+ key := fmt.Sprintf("key%02d", i)
+ fields[i] = b.NextField(start, key)
+ require.NoError(t, b.AppendInt(int64(i)))
+ }
+ require.NoError(t, b.FinishObject(start, fields))
+ v, err := b.Build()
+ require.NoError(t, err)
+
+ obj := v.Value().(variant.ObjectValue)
+ assert.EqualValues(t, 32, obj.NumElements())
+
+ for i := 0; i < 32; i++ {
+ key := fmt.Sprintf("key%02d", i)
+ field, err := obj.ValueByKey(key)
+ require.NoError(t, err, "failed to find key: %s", key)
+ assert.Equal(t, int8(i), field.Value.Value())
+ }
+}