joechenrh commented on code in PR #485:
URL: https://github.com/apache/arrow-go/pull/485#discussion_r2310638476


##########
parquet/file/column_reader.go:
##########
@@ -437,16 +446,27 @@ func (c *columnChunkReader) initDataDecoder(page Page, 
lvlByteLen int64) error {
                        format.Encoding_DELTA_LENGTH_BYTE_ARRAY,
                        format.Encoding_DELTA_BINARY_PACKED,
                        format.Encoding_BYTE_STREAM_SPLIT:
-                       c.curDecoder = 
c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem)
-                       c.decoders[encoding] = c.curDecoder
+                       c.curDecoder = 
c.decoderTraits.Decoder(parquet.Encoding(enc), c.descr, false, c.mem)
+                       c.decoders[enc] = c.curDecoder
                case format.Encoding_RLE_DICTIONARY:
                        return errors.New("parquet: dictionary page must be 
before data page")
                default:
-                       return fmt.Errorf("parquet: unknown encoding type %s", 
encoding)
+                       return fmt.Errorf("parquet: unknown encoding type %s", 
enc)
+               }
+       }
+
+       switch c.descr.PhysicalType() {
+       case parquet.Types.FixedLenByteArray:
+               c.curDecoder = &encoding.FixedLenByteArrayDecoderWrapper{
+                       FixedLenByteArrayDecoder: 
c.curDecoder.(encoding.FixedLenByteArrayDecoder),
+               }
+       case parquet.Types.ByteArray:
+               c.curDecoder = &encoding.ByteArrayDecoderWrapper{
+                       ByteArrayDecoder: 
c.curDecoder.(encoding.ByteArrayDecoder),

Review Comment:
   ```diff
   From eec53776b4aba9af1011a5a419472768c7c4dd0e Mon Sep 17 00:00:00 2001
   From: Ruihao Chen <joeche...@gmail.com>
   Date: Fri, 29 Aug 2025 12:41:17 -0400
   Subject: [PATCH] Add simple test case
   
   Signed-off-by: Ruihao Chen <joeche...@gmail.com>
   ---
    parquet/file/column_writer_test.go | 36 ++++++++++++++++++++++++++++++
    1 file changed, 36 insertions(+)
   
   diff --git a/parquet/file/column_writer_test.go 
b/parquet/file/column_writer_test.go
   index 90b239e4..3670de55 100644
   --- a/parquet/file/column_writer_test.go
   +++ b/parquet/file/column_writer_test.go
   @@ -56,6 +56,41 @@ const (
        DictionaryPageSize = 1024 * 1024
    )
    
   +type simpleAllocator struct {
   +    q chan []byte
   +}
   +
   +func (a *simpleAllocator) Allocate(n int) []byte {
   +    l := len(a.q)
   +    for range l {
   +            b := <-a.q
   +            if cap(b) >= n {
   +                    return b[:n]
   +            }
   +            a.q <- b
   +    }
   +    return make([]byte, n)
   +}
   +
   +func (a *simpleAllocator) Free(b []byte) {
   +    if b == nil {
   +            return
   +    }
   +    select {
   +    case a.q <- b:
   +    default: // discard if full
   +    }
   +}
   +
   +func (a *simpleAllocator) Reallocate(n int, old []byte) []byte {
   +    a.Free(old)
   +    return a.Allocate(n)
   +}
   +
   +func newSimpleAllocator() *simpleAllocator {
   +    return &simpleAllocator{q: make(chan []byte, 64)}
   +}
   +
    type mockpagewriter struct {
        mock.Mock
    }
   @@ -258,6 +293,7 @@ func (p *PrimitiveWriterTestSuite) TearDownTest() {
    }
    
    func (p *PrimitiveWriterTestSuite) buildReader(nrows int64, compression 
compress.Compression) file.ColumnChunkReader {
   +    mem := newSimpleAllocator()
        p.readbuffer = p.sink.Finish()
        pagereader, _ := 
file.NewPageReader(arrutils.NewByteReader(p.readbuffer.Bytes()), nrows, 
compression, mem, nil)
        return file.NewColumnReader(p.descr, pagereader, mem, &p.bufferPool)
   -- 
   2.47.3
   
   
   ```
   
   I've crafted a customized allocator, which could make the UT in 
column_writer_test on main branch fail.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to