[jira] [Created] (ARROW-18438) [Go] firstTimeBitmapWriter.Finish() panics with 8n structs
Min-Young Wu created ARROW-18438: Summary: [Go] firstTimeBitmapWriter.Finish() panics with 8n structs Key: ARROW-18438 URL: https://issues.apache.org/jira/browse/ARROW-18438 Project: Apache Arrow Issue Type: Bug Components: Go, Parquet Affects Versions: 10.0.1 Reporter: Min-Young Wu Even after [ARROW-17169|https://issues.apache.org/jira/browse/ARROW-17169] I still get a panic at the same location. Below is a test case that panics: {code:go} func (ps *ParquetIOTestSuite) TestStructWithNullableListOfStructs() { bldr := array.NewStructBuilder(memory.DefaultAllocator, arrow.StructOf( arrow.Field{ Name: "l", Type: arrow.ListOf(arrow.StructOf( arrow.Field{Name: "a", Type: arrow.BinaryTypes.String}, )), }, )) defer bldr.Release() lBldr := bldr.FieldBuilder(0).(*array.ListBuilder) stBldr := lBldr.ValueBuilder().(*array.StructBuilder) aBldr := stBldr.FieldBuilder(0).(*array.StringBuilder) bldr.AppendNull() bldr.Append(true) lBldr.Append(true) for i := 0; i < 8; i++ { stBldr.Append(true) aBldr.Append(strconv.Itoa(i)) } arr := bldr.NewArray() defer arr.Release() field := arrow.Field{Name: "x", Type: arr.DataType(), Nullable: true} expected := array.NewTable(arrow.NewSchema([]arrow.Field{field}, nil), []arrow.Column{*arrow.NewColumn(field, arrow.NewChunked(field.Type, []arrow.Array{arr}))}, -1) defer expected.Release() ps.roundTripTable(expected, false) } {code} I've tried to trim down the input data and this is as minimal as I could get it. And yes: * wrapping struct with initial null is required * the inner list needs to contain 8 structs (or any multiple of 8) -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Created] (ARROW-16473) [Go] Memory leak in parquet page reading
Min-Young Wu created ARROW-16473: Summary: [Go] Memory leak in parquet page reading Key: ARROW-16473 URL: https://issues.apache.org/jira/browse/ARROW-16473 Project: Apache Arrow Issue Type: Bug Components: Go, Parquet Reporter: Min-Young Wu Assignee: Min-Young Wu {code:go} package main_test import ( "context" "os" "testing" "github.com/apache/arrow/go/v8/arrow/memory" "github.com/apache/arrow/go/v8/parquet" "github.com/apache/arrow/go/v8/parquet/file" "github.com/apache/arrow/go/v8/parquet/pqarrow" ) func TestParquetReading(t *testing.T) { ctx := context.Background() mem := memory.NewCheckedAllocator(memory.DefaultAllocator) defer mem.AssertSize(t, 0) f, err := os.Open("test.parquet") if err != nil { t.Fatal(err) } defer f.Close() pf, err := file.NewParquetReader( f, // Note: use the provided memory allocator file.WithReadProps(parquet.NewReaderProperties(mem)), ) if err != nil { t.Fatal(err) } defer pf.Close() r, err := pqarrow.NewFileReader(pf, pqarrow.ArrowReadProperties{}, mem) if err != nil { t.Fatal(err) } table, err := r.ReadTable(ctx) if err != nil { t.Fatal(err) } defer table.Release() } {code} -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Created] (ARROW-15946) [Go] Memory leak in pqarrow.NewColumnWriter with nested structures
Min-Young Wu created ARROW-15946: Summary: [Go] Memory leak in pqarrow.NewColumnWriter with nested structures Key: ARROW-15946 URL: https://issues.apache.org/jira/browse/ARROW-15946 Project: Apache Arrow Issue Type: Bug Components: Go, Parquet Reporter: Min-Young Wu There seems to be a memory leak (well, using the default allocator, it would just be an accounting error?) when writing nested structures using pqarrow.FileWriter Repro: {code:go} package main import ( "bytes" "fmt" "github.com/apache/arrow/go/v7/arrow" "github.com/apache/arrow/go/v7/arrow/array" "github.com/apache/arrow/go/v7/arrow/memory" "github.com/apache/arrow/go/v7/parquet" "github.com/apache/arrow/go/v7/parquet/compress" "github.com/apache/arrow/go/v7/parquet/pqarrow" ) func main() { allocator := memory.NewCheckedAllocator(memory.DefaultAllocator) sc := arrow.NewSchema([]arrow.Field{ {Name: "f32", Type: arrow.PrimitiveTypes.Float32, Nullable: true}, {Name: "i32", Type: arrow.PrimitiveTypes.Int32, Nullable: true}, {Name: "struct_i64_f64", Type: arrow.StructOf( arrow.Field{Name: "i64", Type: arrow.PrimitiveTypes.Int64, Nullable: true}, arrow.Field{Name: "f64", Type: arrow.PrimitiveTypes.Float64, Nullable: true})}, }, nil) bld := array.NewRecordBuilder(allocator, sc) bld.Field(0).(*array.Float32Builder).Append(1.0) bld.Field(1).(*array.Int32Builder).Append(1) sbld := bld.Field(2).(*array.StructBuilder) sbld.Append(true) sbld.FieldBuilder(0).(*array.Int64Builder).Append(1) sbld.FieldBuilder(1).(*array.Float64Builder).Append(1.0) rec := bld.NewRecord() bld.Release() var buf bytes.Buffer wr, err := pqarrow.NewFileWriter(sc, , parquet.NewWriterProperties(parquet.WithCompression(compress.Codecs.Snappy)), pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(allocator))) if err != nil { panic(err) } err = wr.Write(rec) if err != nil { panic(err) } rec.Release() wr.Close() if allocator.CurrentAlloc() != 0 { fmt.Printf("remaining allocation size: %d\n", allocator.CurrentAlloc()) } } {code} -- This message was sent by Atlassian Jira (v8.20.1#820001)