mapleFU commented on code in PR #44043:
URL: https://github.com/apache/arrow/pull/44043#discussion_r1912695142
##########
cpp/src/parquet/file_reader.cc:
##########
@@ -267,12 +267,13 @@ class SerializedRowGroup : public
RowGroupReader::Contents {
ARROW_DCHECK_NE(data_decryptor, nullptr);
constexpr auto kEncryptedRowGroupsLimit = 32767;
- if (i > kEncryptedRowGroupsLimit) {
+ if (ARROW_PREDICT_FALSE(row_group_ordinal_ > kEncryptedRowGroupsLimit)) {
Review Comment:
The previous check actually checks the column_id, not the row-group ordinal.
##########
cpp/src/parquet/file_writer.cc:
##########
@@ -359,14 +359,26 @@ class FileSerializer : public ParquetFileWriter::Contents
{
if (row_group_writer_) {
row_group_writer_->Close();
}
+ int16_t row_group_ordinal = 0;
+ if (num_row_groups_ < std::numeric_limits<int16_t>::max()) {
+ row_group_ordinal = static_cast<int16_t>(num_row_groups_);
Review Comment:
This check is a bit ugly. Maybe we can also not write the row_group_ordinal
for un-encryption files
##########
cpp/src/parquet/metadata.cc:
##########
@@ -1854,7 +1854,9 @@ class
RowGroupMetaDataBuilder::RowGroupMetaDataBuilderImpl {
row_group_->__set_file_offset(file_offset);
row_group_->__set_total_compressed_size(total_compressed_size);
row_group_->__set_total_byte_size(total_bytes_written);
- row_group_->__set_ordinal(row_group_ordinal);
+ if (row_group_ordinal > 0) {
Review Comment:
By default, the `int16_t row_group_ordinal = -1`, but it's never set to
that...
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]