joechenrh commented on code in PR #485: URL: https://github.com/apache/arrow-go/pull/485#discussion_r2310638476
########## parquet/file/column_reader.go: ########## @@ -437,16 +446,27 @@ func (c *columnChunkReader) initDataDecoder(page Page, lvlByteLen int64) error { format.Encoding_DELTA_LENGTH_BYTE_ARRAY, format.Encoding_DELTA_BINARY_PACKED, format.Encoding_BYTE_STREAM_SPLIT: - c.curDecoder = c.decoderTraits.Decoder(parquet.Encoding(encoding), c.descr, false, c.mem) - c.decoders[encoding] = c.curDecoder + c.curDecoder = c.decoderTraits.Decoder(parquet.Encoding(enc), c.descr, false, c.mem) + c.decoders[enc] = c.curDecoder case format.Encoding_RLE_DICTIONARY: return errors.New("parquet: dictionary page must be before data page") default: - return fmt.Errorf("parquet: unknown encoding type %s", encoding) + return fmt.Errorf("parquet: unknown encoding type %s", enc) + } + } + + switch c.descr.PhysicalType() { + case parquet.Types.FixedLenByteArray: + c.curDecoder = &encoding.FixedLenByteArrayDecoderWrapper{ + FixedLenByteArrayDecoder: c.curDecoder.(encoding.FixedLenByteArrayDecoder), + } + case parquet.Types.ByteArray: + c.curDecoder = &encoding.ByteArrayDecoderWrapper{ + ByteArrayDecoder: c.curDecoder.(encoding.ByteArrayDecoder), Review Comment: ```diff From eec53776b4aba9af1011a5a419472768c7c4dd0e Mon Sep 17 00:00:00 2001 From: Ruihao Chen <joeche...@gmail.com> Date: Fri, 29 Aug 2025 12:41:17 -0400 Subject: [PATCH] Add simple test case Signed-off-by: Ruihao Chen <joeche...@gmail.com> --- parquet/file/column_writer_test.go | 36 ++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/parquet/file/column_writer_test.go b/parquet/file/column_writer_test.go index 90b239e4..3670de55 100644 --- a/parquet/file/column_writer_test.go +++ b/parquet/file/column_writer_test.go @@ -56,6 +56,41 @@ const ( DictionaryPageSize = 1024 * 1024 ) +type simpleAllocator struct { + q chan []byte +} + +func (a *simpleAllocator) Allocate(n int) []byte { + l := len(a.q) + for range l { + b := <-a.q + if cap(b) >= n { + return b[:n] + } + a.q <- b + } + return make([]byte, n) +} + +func (a *simpleAllocator) Free(b []byte) { + if b == nil { + return + } + select { + case a.q <- b: + default: // discard if full + } +} + +func (a *simpleAllocator) Reallocate(n int, old []byte) []byte { + a.Free(old) + return a.Allocate(n) +} + +func newSimpleAllocator() *simpleAllocator { + return &simpleAllocator{q: make(chan []byte, 64)} +} + type mockpagewriter struct { mock.Mock } @@ -258,6 +293,7 @@ func (p *PrimitiveWriterTestSuite) TearDownTest() { } func (p *PrimitiveWriterTestSuite) buildReader(nrows int64, compression compress.Compression) file.ColumnChunkReader { + mem := newSimpleAllocator() p.readbuffer = p.sink.Finish() pagereader, _ := file.NewPageReader(arrutils.NewByteReader(p.readbuffer.Bytes()), nrows, compression, mem, nil) return file.NewColumnReader(p.descr, pagereader, mem, &p.bufferPool) -- 2.47.3 ``` I've crafted a customized allocator, which could make the UT in column_writer_test on main branch fail. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org