This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-go.git


The following commit(s) were added to refs/heads/main by this push:
     new 6b2897c9 fix(arrow/array): adding string boundary checking for 
(*String).Value (#844)
6b2897c9 is described below

commit 6b2897c9eb93a77c42b3cfae9af702df6c257b7f
Author: David Zhao <[email protected]>
AuthorDate: Mon Jun 8 09:25:28 2026 -0700

    fix(arrow/array): adding string boundary checking for (*String).Value (#844)
    
    ### Rationale for this change
    Fixes #843, a silent data leak caused by missing boundary checking in
    (*String).Value.
    
    ### What changes are included in this PR?
    This PR adds boundary checking in (*String).Value and
    (*LargeString).Value and corresponding regression tests in
    arrow/array/string_test.go. Note that regression tests mirrors the one
    found in arrow/array/binary_test.go see
    
[TestBinarySliceOutOfBounds](https://github.com/apache/arrow-go/blob/bbc81ce1d23127f838389fc39a1d619bdcc1cbf9/arrow/array/binary_test.go#L292).
    
    
    ### Are these changes tested?
    Yes - see new added regression test
    
    
    ### Are there any user-facing changes?
    No - adds internal boundary checking.
    
    ---------
    
    Signed-off-by: happydave1 <[email protected]>
---
 arrow/array/string.go      |   6 ++
 arrow/array/string_test.go | 138 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)

diff --git a/arrow/array/string.go b/arrow/array/string.go
index 35b412f2..00748576 100644
--- a/arrow/array/string.go
+++ b/arrow/array/string.go
@@ -57,6 +57,9 @@ func (a *String) Reset(data arrow.ArrayData) {
 
 // Value returns the slice at index i. This value should not be mutated.
 func (a *String) Value(i int) string {
+       if i < 0 || i >= a.data.length {
+               panic("arrow/array: index out of range")
+       }
        i = i + a.data.offset
        return a.values[a.offsets[i]:a.offsets[i+1]]
 }
@@ -262,6 +265,9 @@ func (a *LargeString) Reset(data arrow.ArrayData) {
 
 // Value returns the slice at index i. This value should not be mutated.
 func (a *LargeString) Value(i int) string {
+       if i < 0 || i >= a.data.length {
+               panic("arrow/array: index out of range")
+       }
        i = i + a.data.offset
        return a.values[a.offsets[i]:a.offsets[i+1]]
 }
diff --git a/arrow/array/string_test.go b/arrow/array/string_test.go
index 80c51f1a..0457b8b3 100644
--- a/arrow/array/string_test.go
+++ b/arrow/array/string_test.go
@@ -145,6 +145,144 @@ func TestStringArray(t *testing.T) {
        }
 }
 
+func TestStringSliceOutOfBounds(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", 
"rs", "tu"}
+
+       b := array.NewStringBuilder(mem)
+       defer b.Release()
+
+       for _, v := range values {
+               b.AppendString(v)
+       }
+
+       arr := b.NewArray().(*array.String)
+       defer arr.Release()
+
+       slice := array.NewSlice(arr, 3, 8).(*array.String)
+       defer slice.Release()
+
+       tests := []struct {
+               index int
+               panic bool
+       }{
+               {
+                       index: -1,
+                       panic: true,
+               },
+               {
+                       index: 5,
+                       panic: true,
+               },
+               {
+                       index: 0,
+                       panic: false,
+               },
+               {
+                       index: 4,
+                       panic: false,
+               },
+       }
+
+       for _, tc := range tests {
+               t.Run("", func(t *testing.T) {
+
+                       var val string
+
+                       if tc.panic {
+                               defer func() {
+                                       e := recover()
+                                       if e == nil {
+                                               t.Fatalf("this should have 
panicked, but did not; slice value %q", val)
+                                       }
+                                       if got, want := e.(string), 
"arrow/array: index out of range"; got != want {
+                                               t.Fatalf("invalid error. 
got=%q, want=%q", got, want)
+                                       }
+                               }()
+                       } else {
+                               defer func() {
+                                       if e := recover(); e != nil {
+                                               t.Fatalf("unexpected panic: 
%v", e)
+                                       }
+                               }()
+                       }
+
+                       val = slice.Value(tc.index)
+               })
+       }
+}
+
+func TestLargeStringSliceOutOfBounds(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       values := []string{"a", "bc", "def", "g", "hijk", "lm", "n", "opq", 
"rs", "tu"}
+
+       b := array.NewLargeStringBuilder(mem)
+       defer b.Release()
+
+       for _, v := range values {
+               b.Append(v)
+       }
+
+       arr := b.NewArray().(*array.LargeString)
+       defer arr.Release()
+
+       slice := array.NewSlice(arr, 3, 8).(*array.LargeString)
+       defer slice.Release()
+
+       tests := []struct {
+               index int
+               panic bool
+       }{
+               {
+                       index: -1,
+                       panic: true,
+               },
+               {
+                       index: 5,
+                       panic: true,
+               },
+               {
+                       index: 0,
+                       panic: false,
+               },
+               {
+                       index: 4,
+                       panic: false,
+               },
+       }
+
+       for _, tc := range tests {
+               t.Run("", func(t *testing.T) {
+
+                       var val string
+
+                       if tc.panic {
+                               defer func() {
+                                       e := recover()
+                                       if e == nil {
+                                               t.Fatalf("this should have 
panicked, but did not; slice value %q", val)
+                                       }
+                                       if got, want := e.(string), 
"arrow/array: index out of range"; got != want {
+                                               t.Fatalf("invalid error. 
got=%q, want=%q", got, want)
+                                       }
+                               }()
+                       } else {
+                               defer func() {
+                                       if e := recover(); e != nil {
+                                               t.Fatalf("unexpected panic: 
%v", e)
+                                       }
+                               }()
+                       }
+
+                       val = slice.Value(tc.index)
+               })
+       }
+}
+
 func TestStringBuilder_Empty(t *testing.T) {
        mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
        defer mem.AssertSize(t, 0)

Reply via email to