sfc-gh-nthimmegowda commented on code in PR #14147: URL: https://github.com/apache/arrow/pull/14147#discussion_r974949651
########## cpp/src/parquet/reader_test.cc: ########## @@ -127,6 +127,89 @@ void CheckRowGroupMetadata(const RowGroupMetaData* rg_metadata, } } +class TestBooleanRLE : public ::testing::Test { + public: + void SetUp() { + reader_ = ParquetFileReader::OpenFile(data_file("rle_boolean_encoding.parquet")); + } + + void TearDown() {} + + protected: + std::unique_ptr<ParquetFileReader> reader_; +}; + +TEST_F(TestBooleanRLE, TestBooleanScanner) { + auto group = reader_->RowGroup(0); + + // column 0, id + auto scanner = std::make_shared<BoolScanner>(group->Column(0)); + + bool val = false; + bool is_null = false; + for (int i = 0; i < 8; i++) { + ASSERT_TRUE(scanner->HasNext()); + ASSERT_TRUE(scanner->NextValue(&val, &is_null)); + + // For this file, 3rd index value is null + if (i == 2) { + ASSERT_TRUE(is_null); + } else { + ASSERT_FALSE(is_null); + } + } + + ASSERT_FALSE(scanner->HasNext()); + ASSERT_FALSE(scanner->NextValue(&val, &is_null)); +} + +TEST_F(TestBooleanRLE, TestBatchRead) { + auto group = reader_->RowGroup(0); + + // column 0, id + auto col = std::dynamic_pointer_cast<BoolReader>(group->Column(0)); + + // This file only has 8 rows + ASSERT_EQ(8, reader_->metadata()->num_rows()); + // This file only has 1 row group + ASSERT_EQ(1, reader_->metadata()->num_row_groups()); + // Size of the metadata is 106 bytes + ASSERT_EQ(106, reader_->metadata()->size()); + // This row group must have 8 rows + ASSERT_EQ(8, group->metadata()->num_rows()); + + // Check if the column is encoded with RLE + auto col_chunk = group->metadata()->ColumnChunk(0); + ASSERT_TRUE(std::find(col_chunk->encodings().begin(), col_chunk->encodings().end(), + Encoding::RLE) != col_chunk->encodings().end()); + + // Assert column has values to be read + ASSERT_TRUE(col->HasNext()); + int64_t curr_batch_read = 0; + + const int16_t batch_size = 8; + int16_t def_levels[batch_size]; + int16_t rep_levels[batch_size]; + bool values[batch_size - 1]; + + auto levels_read = + col->ReadBatch(batch_size, def_levels, rep_levels, values, &curr_batch_read); + ASSERT_EQ(batch_size, levels_read); + + // Since one value is a null value, expect batches read to be one less than indicated + // batch_size + ASSERT_EQ(batch_size - 1, curr_batch_read); + + // 3rd index is null value + ASSERT_THAT(def_levels, testing::ElementsAre(1, 1, 0, 1, 1, 1, 1, 1)); + + // Validate inserted data is as expected + ASSERT_THAT(values, testing::ElementsAre(1, 0, 1, 1, 0, 0, 1)); Review Comment: Yes, the input has both `<bit-packed-run>` and `<rle-run>`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org