Ted-Jiang commented on code in PR #2011:
URL: https://github.com/apache/arrow-rs/pull/2011#discussion_r914541733


##########
parquet/src/file/serialized_reader.rs:
##########
@@ -1098,11 +1105,292 @@ mod tests {
         let offset_indexes = metadata.offset_indexes().unwrap();
         // only one row group
         assert_eq!(offset_indexes.len(), 1);
-        let offset_index = offset_indexes.get(0).unwrap();
-        let page_offset = offset_index.get(0).unwrap();
+        let offset_index = &offset_indexes[0];
+        let page_offset = &offset_index[0][0];
 
         assert_eq!(4, page_offset.offset);
         assert_eq!(152, page_offset.compressed_page_size);
         assert_eq!(0, page_offset.first_row_index);
     }
+
+    #[test]
+    fn test_page_index_reader_all_type() {
+        let test_file = get_test_file("alltypes_tiny_pages_plain.parquet");
+        let builder = ReadOptionsBuilder::new();
+        //enable read page index
+        let options = builder.with_page_index().build();
+        let reader_result = SerializedFileReader::new_with_options(test_file, 
options);
+        let reader = reader_result.unwrap();
+
+        // Test contents in Parquet metadata
+        let metadata = reader.metadata();
+        assert_eq!(metadata.num_row_groups(), 1);
+
+        let page_indexes = metadata.page_indexes().unwrap();
+        let row_group_offset_indexes = &metadata.offset_indexes().unwrap()[0];
+
+        // only one row group
+        assert_eq!(page_indexes.len(), 1);
+        let row_group_metadata = metadata.row_group(0);
+
+        //col0->id: INT32 UNCOMPRESSED DO:0 FPO:4 SZ:37325/37325/1.00 VC:7300 
ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 7299, num_nulls: 0]
+        if let Index::INT32(index) = &page_indexes[0][0] {
+            check_native_page_index(
+                index,
+                325,
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .min_bytes(),
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .max_bytes(),
+                BoundaryOrder::Unordered,
+            );
+            assert_eq!(row_group_offset_indexes[0].len(), 325);
+        } else {
+            unreachable!()
+        };
+        //col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 
VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0]
+        if let Index::BOOLEAN(index) = &page_indexes[0][1] {
+            assert_eq!(index.indexes.len(), 82);
+            assert_eq!(row_group_offset_indexes[1].len(), 82);
+        } else {
+            unreachable!()
+        };
+        //col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 
SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, 
num_nulls: 0]
+        if let Index::INT32(index) = &page_indexes[0][2] {
+            check_native_page_index(
+                index,
+                325,
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .min_bytes(),
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .max_bytes(),
+                BoundaryOrder::Ascending,
+            );
+            assert_eq!(row_group_offset_indexes[2].len(), 325);
+        } else {
+            unreachable!()
+        };
+        //col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 
SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, 
num_nulls: 0]
+        if let Index::INT32(index) = &page_indexes[0][3] {
+            check_native_page_index(
+                index,
+                325,
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .min_bytes(),
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .max_bytes(),
+                BoundaryOrder::Ascending,
+            );
+            assert_eq!(row_group_offset_indexes[3].len(), 325);
+        } else {
+            unreachable!()
+        };
+        //col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 
SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, 
num_nulls: 0]
+        if let Index::INT32(index) = &page_indexes[0][4] {
+            check_native_page_index(
+                index,
+                325,
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .min_bytes(),
+                row_group_metadata
+                    .column(0)
+                    .statistics()
+                    .unwrap()
+                    .max_bytes(),
+                BoundaryOrder::Ascending,
+            );
+            assert_eq!(row_group_offset_indexes[4].len(), 325);
+        } else {
+            unreachable!()
+        };
+        //col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 
SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, 
num_nulls: 0]
+        if let Index::INT64(index) = &page_indexes[0][5] {
+            //Todo 
row_group_metadata.column(0).statistics().unwrap().min_bytes() only return 4 
bytes
+            check_native_page_index(

Review Comment:
   i try to use 
   ```
   row_group_metadata
                       .column(0)
                       .statistics()
                       .unwrap()
                       .min_bytes(),
   ```
   get min values from one column chunk metadata in type `In64`, but it return 
only 4 bytes...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to