tushartg commented on issue #506:
URL: https://github.com/apache/arrow-go/issues/506#issuecomment-3320770500

   code:
   ```go
   func getSmallArrowRecordFromScratch(mem memory.Allocator) arrow.Record {
        schema := arrow.NewSchema([]arrow.Field{
                {Name: "col0", Type: arrow.BinaryTypes.String, Nullable: false},
                {Name: "col1", Type: arrow.BinaryTypes.String, Nullable: false},
                {Name: "col2", Type: arrow.PrimitiveTypes.Int64, Nullable: 
false},
        }, nil)
        builder := array.NewRecordBuilder(mem, schema)
        defer builder.Release()
        builder.Field(0).(*array.StringBuilder).Append("hello")
        builder.Field(1).(*array.StringBuilder).Append("world")
        builder.Field(2).(*array.Int64Builder).Append(123)
        record := builder.NewRecord()
        return record
   }
   
   func TestArrowParquetMemoryLeak(t *testing.T) {
            // custom allocator
        mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
        memory.DefaultAllocator = mem
        defer mem.AssertSize(t, 0)
   
        filePath := filepath.Join(os.TempDir(), "parquet-bench-*.parquet")
        file, err := os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 
0o644)
        if err != nil {
                t.Fatalf("Error opening file: %v", err)
        }
   
        // Use basic properties with minimal configuration for better 
Databricks compatibility
        // Avoid WithStoreSchema() which can cause "Unexpected trailing bytes" 
errors in Databricks
        // Use GZIP compression which is more widely supported than Snappy
        props := parquet.NewWriterProperties(parquet.WithAllocator(mem))
        arrowProps := 
pqarrow.NewArrowWriterProperties(pqarrow.WithAllocator(mem))
   
        record := getSmallArrowRecordFromScratch(mem)
        defer (*record).Release()
   
        writer, err := pqarrow.NewFileWriter((*record).Schema(), file, props, 
arrowProps)
        if err != nil {
                log.Logger().Errorf("Error creating parquet writer for file %s: 
%v", filePath, err)
                file.Close()
                t.Fatalf("Error creating parquet writer: %v", err)
        }
        writer.WriteBuffered(*record)
        writer.Close()
        file.Close()
        os.Remove(filePath)
   }
   ```
   
   If you run the above code it 
   
   ```shell
   === RUN   TestArrowParquetMemoryLeak
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.(*PooledBufferWriter).Reset+b3
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/types.go:212
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+130
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:164
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 256 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/file.newColumnWriterBase+b0
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/file/column_writer.go:165
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/checked_allocator.go:186: LEAK of 2048 bytes FROM
                   
github.com/apache/arrow-go/v18/parquet/internal/encoding.newEncoderBase+7c
                           
/home/repo/vendor/github.com/apache/arrow-go/v18/parquet/internal/encoding/encoder.go:79
       /home/repo/pkg/util/parquet_helper_test.go:65: invalid memory size 
exp=0, got=20224
   --- FAIL: TestArrowParquetMemoryLeak (0.00s)
   FAIL
   FAIL    github.com/SimpleDataLabsInc/prophecy-orchestrate/pkg/util      
0.436s
   ```
   
   if you comment out this code
   ```go
   memory.DefaultAllocator = mem
   ```
   then it shows no memory Leak. So, I believe somewhere library is using the 
default allocator.
   
   @zeroshade - am I doing something wrong?


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to