This is an automated email from the ASF dual-hosted git repository.

zeroshade pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2fe17338e2 GH-34853: [Go] Add TotalRecordSize, TotalArraySize (#34854)
2fe17338e2 is described below

commit 2fe17338e2d1f85d0c2685d31d2dd51f138b6b80
Author: Yevgeny Pats <[email protected]>
AuthorDate: Mon Apr 10 14:50:55 2023 -0400

    GH-34853: [Go] Add TotalRecordSize, TotalArraySize (#34854)
    
    Closes https://github.com/apache/arrow/issues/34853
    * Closes: #34853
    
    Authored-by: Yevgeny Pats <[email protected]>
    Signed-off-by: Matt Topol <[email protected]>
---
 go/arrow/util/byte_size.go      |  79 +++++++++++++++++++++++++++++
 go/arrow/util/byte_size_test.go | 110 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+)

diff --git a/go/arrow/util/byte_size.go b/go/arrow/util/byte_size.go
new file mode 100644
index 0000000000..db08e8d27b
--- /dev/null
+++ b/go/arrow/util/byte_size.go
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package util
+
+import (
+       "github.com/apache/arrow/go/v12/arrow"
+       "github.com/apache/arrow/go/v12/arrow/array"
+       "github.com/apache/arrow/go/v12/arrow/memory"
+)
+
+func isArrayDataNil(arrayData arrow.ArrayData) bool {
+       if arrayData == nil {
+               return true
+       }
+       if v, ok := arrayData.(*array.Data); ok {
+               return v == nil
+       }
+       panic("unknown ArrayData type")
+}
+
+func totalArrayDataSize(arrayData arrow.ArrayData, seenBuffers 
map[*memory.Buffer]struct{}) int64 {
+       var sum int64
+       var void = struct{}{}
+       for _, buf := range arrayData.Buffers() {
+               if buf == nil {
+                       continue
+               }
+               if _, ok := seenBuffers[buf]; !ok {
+                       sum += int64(buf.Len())
+                       seenBuffers[buf] = void
+               }
+       }
+       for _, child := range arrayData.Children() {
+               sum += totalArrayDataSize(child, seenBuffers)
+       }
+       dict := arrayData.Dictionary()
+       if !isArrayDataNil(dict) {
+               sum += totalArrayDataSize(dict, seenBuffers)
+       }
+       return sum
+}
+
+func totalArraySize(arr arrow.Array, seenBuffers map[*memory.Buffer]struct{}) 
int64 {
+       return totalArrayDataSize(arr.Data(), seenBuffers)
+}
+
+func totalRecordSize(record arrow.Record, seenBuffers 
map[*memory.Buffer]struct{}) int64 {
+       var sum int64
+       for _, c := range record.Columns() {
+               sum += totalArraySize(c, seenBuffers)
+       }
+       return sum
+}
+
+// TotalArraySize returns the sum of the number of bytes in each buffer 
referenced by the Array.
+func TotalArraySize(arr arrow.Array) int64 {
+       seenBuffer := make(map[*memory.Buffer]struct{})
+       return totalArraySize(arr, seenBuffer)
+}
+
+// TotalRecordSize return the sum of bytes in each buffer referenced by the 
Record.
+func TotalRecordSize(record arrow.Record) int64 {
+       seenBuffer := make(map[*memory.Buffer]struct{})
+       return totalRecordSize(record, seenBuffer)
+}
diff --git a/go/arrow/util/byte_size_test.go b/go/arrow/util/byte_size_test.go
new file mode 100644
index 0000000000..794aaf1953
--- /dev/null
+++ b/go/arrow/util/byte_size_test.go
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package util_test
+
+import (
+       "strings"
+       "testing"
+
+       "github.com/apache/arrow/go/v12/arrow"
+       "github.com/apache/arrow/go/v12/arrow/array"
+       "github.com/apache/arrow/go/v12/arrow/memory"
+       "github.com/apache/arrow/go/v12/arrow/util"
+       "github.com/stretchr/testify/assert"
+)
+
+func TestTotalArrayReusedBuffers(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+       bldr := array.NewBooleanBuilder(mem)
+       defer bldr.Release()
+       bldr.Append(true)
+       arr := bldr.NewArray()
+       defer arr.Release()
+
+       rec := array.NewRecord(arrow.NewSchema([]arrow.Field{
+               {Name: "a", Type: arrow.FixedWidthTypes.Boolean},
+               {Name: "b", Type: arrow.FixedWidthTypes.Boolean},
+       }, nil), []arrow.Array{arr, arr}, 1)
+       defer rec.Release()
+
+       assert.Equal(t, int64(5), util.TotalRecordSize(rec))
+
+       rec1 := array.NewRecord(arrow.NewSchema([]arrow.Field{
+               {Name: "a", Type: arrow.FixedWidthTypes.Boolean},
+       }, nil), []arrow.Array{arr}, 1)
+       defer rec1.Release()
+
+       // both records should have the same size as rec is using the same 
buffer
+       assert.Equal(t, int64(5), util.TotalRecordSize(rec1))
+}
+
+func TestTotalArraySizeBasic(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       noNulls, _, err := array.FromJSON(mem,
+               arrow.PrimitiveTypes.Int16,
+               strings.NewReader("[1, 2, 3]"))
+       assert.NoError(t, err)
+       defer noNulls.Release()
+       assert.Equal(t, int64(10), util.TotalArraySize(noNulls))
+
+       withNulls, _, err := array.FromJSON(mem,
+               arrow.PrimitiveTypes.Int16,
+               strings.NewReader("[1, 2, 3, 4, null, 6, 7, 8, 9]"))
+       assert.NoError(t, err)
+       defer withNulls.Release()
+       assert.Equal(t, int64(22), util.TotalArraySize(withNulls))
+
+       bldr := array.NewBooleanBuilder(mem)
+       defer bldr.Release()
+
+       arr := bldr.NewArray()
+       defer arr.Release()
+
+       assert.Equal(t, int64(0), util.TotalArraySize(arr))
+}
+
+func TestTotalArraySizeNested(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       arrayWithChildren, _, err := array.FromJSON(mem,
+               arrow.ListOf(arrow.PrimitiveTypes.Int64),
+               strings.NewReader("[[0, 1, 2, 3, 4], [5], null]"))
+       assert.NoError(t, err)
+       defer arrayWithChildren.Release()
+       assert.Equal(t, int64(72), util.TotalArraySize(arrayWithChildren))
+}
+
+func TestTotalArraySizeRecord(t *testing.T) {
+       mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
+       defer mem.AssertSize(t, 0)
+
+       recordBldr := array.NewRecordBuilder(mem, arrow.NewSchema([]arrow.Field{
+               {Name: "a", Type: arrow.PrimitiveTypes.Int32},
+               {Name: "b", Type: arrow.PrimitiveTypes.Int64},
+       }, nil))
+       defer recordBldr.Release()
+       recordBldr.Field(0).(*array.Int32Builder).AppendValues([]int32{1, 2, 
3}, nil)
+       recordBldr.Field(1).(*array.Int64Builder).AppendValues([]int64{4, 5, 
6}, nil)
+       record := recordBldr.NewRecord()
+       defer record.Release()
+
+       assert.Equal(t, int64(44), util.TotalRecordSize(record))
+}

Reply via email to