Ted-Jiang commented on code in PR #2011: URL: https://github.com/apache/arrow-rs/pull/2011#discussion_r914541733
########## parquet/src/file/serialized_reader.rs: ########## @@ -1098,11 +1105,292 @@ mod tests { let offset_indexes = metadata.offset_indexes().unwrap(); // only one row group assert_eq!(offset_indexes.len(), 1); - let offset_index = offset_indexes.get(0).unwrap(); - let page_offset = offset_index.get(0).unwrap(); + let offset_index = &offset_indexes[0]; + let page_offset = &offset_index[0][0]; assert_eq!(4, page_offset.offset); assert_eq!(152, page_offset.compressed_page_size); assert_eq!(0, page_offset.first_row_index); } + + #[test] + fn test_page_index_reader_all_type() { + let test_file = get_test_file("alltypes_tiny_pages_plain.parquet"); + let builder = ReadOptionsBuilder::new(); + //enable read page index + let options = builder.with_page_index().build(); + let reader_result = SerializedFileReader::new_with_options(test_file, options); + let reader = reader_result.unwrap(); + + // Test contents in Parquet metadata + let metadata = reader.metadata(); + assert_eq!(metadata.num_row_groups(), 1); + + let page_indexes = metadata.page_indexes().unwrap(); + let row_group_offset_indexes = &metadata.offset_indexes().unwrap()[0]; + + // only one row group + assert_eq!(page_indexes.len(), 1); + let row_group_metadata = metadata.row_group(0); + + //col0->id: INT32 UNCOMPRESSED DO:0 FPO:4 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 7299, num_nulls: 0] + if let Index::INT32(index) = &page_indexes[0][0] { + check_native_page_index( + index, + 325, + row_group_metadata + .column(0) + .statistics() + .unwrap() + .min_bytes(), + row_group_metadata + .column(0) + .statistics() + .unwrap() + .max_bytes(), + BoundaryOrder::Unordered, + ); + assert_eq!(row_group_offset_indexes[0].len(), 325); + } else { + unreachable!() + }; + //col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0] + if let Index::BOOLEAN(index) = &page_indexes[0][1] { + assert_eq!(index.indexes.len(), 82); + assert_eq!(row_group_offset_indexes[1].len(), 82); + } else { + unreachable!() + }; + //col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] + if let Index::INT32(index) = &page_indexes[0][2] { + check_native_page_index( + index, + 325, + row_group_metadata + .column(0) + .statistics() + .unwrap() + .min_bytes(), + row_group_metadata + .column(0) + .statistics() + .unwrap() + .max_bytes(), + BoundaryOrder::Ascending, + ); + assert_eq!(row_group_offset_indexes[2].len(), 325); + } else { + unreachable!() + }; + //col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] + if let Index::INT32(index) = &page_indexes[0][3] { + check_native_page_index( + index, + 325, + row_group_metadata + .column(0) + .statistics() + .unwrap() + .min_bytes(), + row_group_metadata + .column(0) + .statistics() + .unwrap() + .max_bytes(), + BoundaryOrder::Ascending, + ); + assert_eq!(row_group_offset_indexes[3].len(), 325); + } else { + unreachable!() + }; + //col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] + if let Index::INT32(index) = &page_indexes[0][4] { + check_native_page_index( + index, + 325, + row_group_metadata + .column(0) + .statistics() + .unwrap() + .min_bytes(), + row_group_metadata + .column(0) + .statistics() + .unwrap() + .max_bytes(), + BoundaryOrder::Ascending, + ); + assert_eq!(row_group_offset_indexes[4].len(), 325); + } else { + unreachable!() + }; + //col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, num_nulls: 0] + if let Index::INT64(index) = &page_indexes[0][5] { + //Todo row_group_metadata.column(0).statistics().unwrap().min_bytes() only return 4 bytes + check_native_page_index( Review Comment: i try to use ``` row_group_metadata .column(0) .statistics() .unwrap() .min_bytes(), ``` get min values from one column chunk metadata in type `In64`, but it return only 4 bytes... -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org