tushartg commented on issue #506:
URL: https://github.com/apache/arrow-go/issues/506#issuecomment-3320770500
code:
```go
func getSmallArrowRecordFromScratch(mem memory.Allocator) arrow.Record {
schema := arrow.NewSchema([]arrow.Field{
{Name: "col0", Type: arrow.BinaryTypes.String, Nullable: false},
{Name: "col1", Type: arrow.BinaryTypes.String, Nullable: false},
{Name: "col2", Type: arrow.PrimitiveTypes.Int64, Nullable:
false},
}, nil)
builder := array.NewRecordBuilder(mem, schema)
defer builder.Release()
builder.Field(0).(*array.StringBuilder).Append("hello")
builder.Field(1).(*array.StringBuilder).Append("world")
builder.Field(2).(*array.Int64Builder).Append(123)
record := builder.NewRecord()
return record
}
func TestArrowParquetMemoryLeak(t *testing.T) {
// custom allocator
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
memory.DefaultAllocator = mem
defer mem.AssertSize(t, 0)
filePath := filepath.Join(os.TempDir(), "parquet-bench-*.parquet")
file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC,
0o644)
if err != nil {
t.Fatalf("Error opening file: %v", err)
}
// Use basic properties with minimal configuration for better
Databricks compatibility
// Avoid WithStoreSchema() which can cause "Unexpected trailing bytes"
errors in Databricks
// Use GZIP compression which is more widely supported than Snappy
props := parquet.NewWriterProperties(parquet.WithAllocator(mem))
arrowProps :=
pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))
record := getSmallArrowRecordFromScratch(mem)
defer (*record).Release()
writer, err := pqarrow.NewFileWriter((*record).Schema(), file, props,
arrowProps)
if err != nil {
log.Logger().Errorf("Error creating parquet writer for file %s:
%v", filePath, err)
file.Close()
t.Fatalf("Error creating parquet writer: %v", err)
}
writer.WriteBuffered(*record)
writer.Close()
file.Close()
os.Remove(filePath)
}
```
If you run the above code it
```shell
=== RUN TestArrowParquetMemoryLeak
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.(*PooledBufferWriter).Reset+b3
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/types.go:212
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
/home/repo/pkg/util/parquet_helper_test.go:65: invalid memory size
exp=0, got=20224
--- FAIL: TestArrowParquetMemoryLeak (0.00s)
FAIL
FAIL github.com/SimpleDataLabsInc/prophecy-orchestrate/pkg/util
0.436s
```
if you comment out this code
```go
memory.DefaultAllocator = mem
```
then it shows no memory Leak. So, I believe somewhere library is using the
default allocator.
@zeroshade - am I doing something wrong?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]