This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git
The following commit(s) were added to refs/heads/main by this push:
new 6b2897c9 fix(arrow/array): adding string boundary checking for
(*String).Value (#844)
6b2897c9 is described below
commit 6b2897c9eb93a77c42b3cfae9af702df6c257b7f
Author: David Zhao <[email protected]>
AuthorDate: Mon Jun 8 09:25:28 2026 -0700
fix(arrow/array): adding string boundary checking for (*String).Value (#844)
### Rationale for this change
Fixes #843, a silent data leak caused by missing boundary checking in
(*String).Value.
### What changes are included in this PR?
This PR adds boundary checking in (*String).Value and
(*LargeString).Value and corresponding regression tests in
arrow/array/string_test.go. Note that regression tests mirrors the one
found in arrow/array/binary_test.go see
[TestBinarySliceOutOfBounds](https://github.com/apache/arrow-go/blob/bbc81ce1d23127f838389fc39a1d619bdcc1cbf9/arrow/array/binary_test.go#L292).
### Are these changes tested?
Yes - see new added regression test
### Are there any user-facing changes?
No - adds internal boundary checking.
---------
Signed-off-by: happydave1 <[email protected]>
---
arrow/array/string.go | 6 ++
arrow/array/string_test.go | 138 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 144 insertions(+)
diff --git a/arrow/array/string.go b/arrow/array/string.go
index 35b412f2..00748576 100644
--- a/arrow/array/string.go
+++ b/arrow/array/string.go
@@ -57,6 +57,9 @@ func (a *String) Reset(data arrow.ArrayData) {
// Value returns the slice at index i. This value should not be mutated.
func (a *String) Value(i int) string {
+ if i < 0 || i >= a.data.length {
+ panic("arrow/array: index out of range")
+ }
i = i + a.data.offset
return a.values[a.offsets[i]:a.offsets[i+1]]
}
@@ -262,6 +265,9 @@ func (a *LargeString) Reset(data arrow.ArrayData) {
// Value returns the slice at index i. This value should not be mutated.
func (a *LargeString) Value(i int) string {
+ if i < 0 || i >= a.data.length {
+ panic("arrow/array: index out of range")
+ }
i = i + a.data.offset
return a.values[a.offsets[i]:a.offsets[i+1]]
}
diff --git a/arrow/array/string_test.go b/arrow/array/string_test.go
index 80c51f1a..0457b8b3 100644
--- a/arrow/array/string_test.go
+++ b/arrow/array/string_test.go
@@ -145,6 +145,144 @@ func TestStringArray(t *testing.T) {
}
}
+func TestStringSliceOutOfBounds(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq",
"rs", "tu"}
+
+ b := array.NewStringBuilder(mem)
+ defer b.Release()
+
+ for _, v := range values {
+ b.AppendString(v)
+ }
+
+ arr := b.NewArray().(*array.String)
+ defer arr.Release()
+
+ slice := array.NewSlice(arr, 3, 8).(*array.String)
+ defer slice.Release()
+
+ tests := []struct {
+ index int
+ panic bool
+ }{
+ {
+ index: -1,
+ panic: true,
+ },
+ {
+ index: 5,
+ panic: true,
+ },
+ {
+ index: 0,
+ panic: false,
+ },
+ {
+ index: 4,
+ panic: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+
+ var val string
+
+ if tc.panic {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("this should have
panicked, but did not; slice value %q", val)
+ }
+ if got, want := e.(string),
"arrow/array: index out of range"; got != want {
+ t.Fatalf("invalid error.
got=%q, want=%q", got, want)
+ }
+ }()
+ } else {
+ defer func() {
+ if e := recover(); e != nil {
+ t.Fatalf("unexpected panic:
%v", e)
+ }
+ }()
+ }
+
+ val = slice.Value(tc.index)
+ })
+ }
+}
+
+func TestLargeStringSliceOutOfBounds(t *testing.T) {
+ mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+ defer mem.AssertSize(t, 0)
+
+ values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq",
"rs", "tu"}
+
+ b := array.NewLargeStringBuilder(mem)
+ defer b.Release()
+
+ for _, v := range values {
+ b.Append(v)
+ }
+
+ arr := b.NewArray().(*array.LargeString)
+ defer arr.Release()
+
+ slice := array.NewSlice(arr, 3, 8).(*array.LargeString)
+ defer slice.Release()
+
+ tests := []struct {
+ index int
+ panic bool
+ }{
+ {
+ index: -1,
+ panic: true,
+ },
+ {
+ index: 5,
+ panic: true,
+ },
+ {
+ index: 0,
+ panic: false,
+ },
+ {
+ index: 4,
+ panic: false,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+
+ var val string
+
+ if tc.panic {
+ defer func() {
+ e := recover()
+ if e == nil {
+ t.Fatalf("this should have
panicked, but did not; slice value %q", val)
+ }
+ if got, want := e.(string),
"arrow/array: index out of range"; got != want {
+ t.Fatalf("invalid error.
got=%q, want=%q", got, want)
+ }
+ }()
+ } else {
+ defer func() {
+ if e := recover(); e != nil {
+ t.Fatalf("unexpected panic:
%v", e)
+ }
+ }()
+ }
+
+ val = slice.Value(tc.index)
+ })
+ }
+}
+
func TestStringBuilder_Empty(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
defer mem.AssertSize(t, 0)