Ted-Jiang commented on code in PR #3625:
URL: https://github.com/apache/arrow-rs/pull/3625#discussion_r1090530412
##########
parquet/src/file/metadata.rs:
##########
@@ -50,7 +50,25 @@ use crate::schema::types::{
Type as SchemaType,
};
+/// [`Index`] for each row group of each column.
+///
+/// `column_index[row_group_number][column_number]` holds the
+/// [`Index`] corresponding to column `column_number` of row group
+/// `row_group_number`.
+///
+/// For example `column_index[2][3]` holds the [`Index`] for the forth
+/// column in the third row group of the parquet file.
pub type ParquetColumnIndex = Vec<Vec<Index>>;
+
+/// [`PageLocation`] for each datapage of each row group of each column.
+///
+/// `offset_index[row_group_number][column_number][page_number]` holds
+/// the [`PageLocation`] corresponding to page `page_number` of column
+/// `column_number`of row group `row_group_number`.
+///
+/// For example `offset_index[2][3][4]` holds the [`PageLocation`] for
Review Comment:
Nice write up! 👍
##########
parquet/src/file/page_index/index.rs:
##########
@@ -90,14 +103,17 @@ impl Index {
}
}
-/// An index of a column of [`Type`] physical representation
+/// Stores the [`PageIndex`] for each page of a column with [`Type`]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct NativeIndex<T: ParquetValueType> {
- /// The physical type
+ /// The physical type of this column
pub physical_type: Type,
/// The indexes, one item per page
pub indexes: Vec<PageIndex<T>>,
- /// the order
+ /// If the min/max elements are ordered, and if so in which
Review Comment:
This is the correct description.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]