emkornfield commented on code in PR #14964:
URL: https://github.com/apache/arrow/pull/14964#discussion_r1083088746


##########
cpp/src/parquet/page_index.cc:
##########
@@ -184,8 +185,219 @@ class OffsetIndexImpl : public OffsetIndex {
   std::vector<PageLocation> page_locations_;
 };
 
+class RowGroupPageIndexReaderImpl : public RowGroupPageIndexReader {
+ public:
+  RowGroupPageIndexReaderImpl(::arrow::io::RandomAccessFile* input,
+                              std::shared_ptr<RowGroupMetaData> 
row_group_metadata,
+                              const ReaderProperties& properties,
+                              int32_t row_group_ordinal,
+                              std::shared_ptr<InternalFileDecryptor> 
file_decryptor)
+      : input_(input),
+        row_group_metadata_(std::move(row_group_metadata)),
+        properties_(properties),
+        file_decryptor_(std::move(file_decryptor)),
+        index_read_range_(
+            
PageIndexReader::DeterminePageIndexRangesInRowGroup(*row_group_metadata_)) {}
+
+  /// Read column index of a column chunk.
+  std::shared_ptr<ColumnIndex> GetColumnIndex(int32_t i) override {
+    if (i < 0 || i >= row_group_metadata_->num_columns()) {
+      throw ParquetException("Invalid column {} to get column index", i);
+    }
+
+    auto col_chunk = row_group_metadata_->ColumnChunk(i);
+
+    std::unique_ptr<ColumnCryptoMetaData> crypto_metadata = 
col_chunk->crypto_metadata();
+    if (crypto_metadata != nullptr && file_decryptor_ == nullptr) {
+      ParquetException::NYI("Cannot read encrypted column index yet");
+    }
+
+    auto column_index_location = col_chunk->GetColumnIndexLocation();
+    if (!column_index_location.has_value()) {
+      return nullptr;
+    }
+
+    if (!index_read_range_.column_index.has_value()) {
+      throw ParquetException("Missing column index read range");
+    }
+
+    if (column_index_buffer_ == nullptr) {
+      PARQUET_ASSIGN_OR_THROW(column_index_buffer_,
+                              
input_->ReadAt(index_read_range_.column_index->offset,
+                                             
index_read_range_.column_index->length));
+    }
+
+    auto buffer = column_index_buffer_.get();
+    int64_t buffer_offset =
+        column_index_location->offset - index_read_range_.column_index->offset;
+    uint32_t length = static_cast<uint32_t>(column_index_location->length);
+    DCHECK_GE(buffer_offset, 0);
+    DCHECK_LE(buffer_offset + length, index_read_range_.column_index->length);
+
+    auto descr = row_group_metadata_->schema()->Column(i);
+    std::shared_ptr<ColumnIndex> column_index;
+    try {
+      column_index =
+          ColumnIndex::Make(*descr, buffer->data() + buffer_offset, length, 
properties_);
+    } catch (...) {
+      throw ParquetException("Cannot deserialize column index for column {}", 
i);
+    }
+    return column_index;
+  }
+
+  /// Read offset index of a column chunk.
+  std::shared_ptr<OffsetIndex> GetOffsetIndex(int32_t i) override {
+    if (i < 0 || i >= row_group_metadata_->num_columns()) {
+      throw ParquetException("Invalid column {} to get offset index", i);
+    }
+
+    auto col_chunk = row_group_metadata_->ColumnChunk(i);
+
+    std::unique_ptr<ColumnCryptoMetaData> crypto_metadata = 
col_chunk->crypto_metadata();
+    if (crypto_metadata != nullptr && file_decryptor_ == nullptr) {
+      ParquetException::NYI("Cannot read encrypted offset index yet");
+    }
+
+    auto offset_index_location = col_chunk->GetOffsetIndexLocation();
+    if (!offset_index_location.has_value()) {
+      return nullptr;
+    }
+
+    if (!index_read_range_.offset_index.has_value()) {
+      throw ParquetException("Missing column index read range");
+    }
+
+    if (offset_index_buffer_ == nullptr) {
+      PARQUET_ASSIGN_OR_THROW(offset_index_buffer_,
+                              
input_->ReadAt(index_read_range_.offset_index->offset,
+                                             
index_read_range_.offset_index->length));
+    }
+
+    auto buffer = offset_index_buffer_.get();
+    int64_t buffer_offset =
+        offset_index_location->offset - index_read_range_.offset_index->offset;
+    uint32_t length = static_cast<uint32_t>(offset_index_location->length);

Review Comment:
   same comment as above on casts.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to