zhuqi-lucas commented on code in PR #18160:
URL: https://github.com/apache/datafusion/pull/18160#discussion_r2465886794
##########
datafusion/core/tests/datasource/object_store_access.rs:
##########
@@ -120,24 +124,132 @@ async fn query_multi_csv_file() {
}
#[tokio::test]
-async fn create_single_parquet_file() {
+async fn create_single_parquet_file_default() {
+ // The default metadata size hint is 512KB
+ // which is enough to fetch the entire footer metadata and PageIndex
+ // in a single GET request.
+ let test = Test::new().with_single_file_parquet().await;
+ // expect 1 get request which reads the footer metadata and page index
assert_snapshot!(
- single_file_parquet_test().await.requests(),
+ test.requests(),
+ @r"
+ RequestCountingObjectStore()
+ Total Requests: 2
+ - HEAD path=parquet_table.parquet
+ - GET (range) range=0-2994 path=parquet_table.parquet
+ "
+ );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_prefetch() {
+ // Explicitly specify a prefetch hint that is adequate for the footer and
page index
+ let test = Test::new()
+ .with_parquet_metadata_size_hint(Some(1000))
+ .with_single_file_parquet()
+ .await;
+ // expect 1 1000 byte request which reads the footer metadata and page
index
+ assert_snapshot!(
+ test.requests(),
+ @r"
+ RequestCountingObjectStore()
+ Total Requests: 2
+ - HEAD path=parquet_table.parquet
+ - GET (range) range=1994-2994 path=parquet_table.parquet
+ "
+ );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_too_small_prefetch() {
+ // configure a prefetch size that is too small to fetch the footer
+ // metadata
+ //
+ // Using the ranges from the test below (with no_prefetch),
+ // pick a number less than 730:
+ // --------
+ // 2286-2294: (8 bytes) footer + length
+ // 2264-2986: (722 bytes) footer metadata
+ let test = Test::new()
+ .with_parquet_metadata_size_hint(Some(500))
+ .with_single_file_parquet()
+ .await;
+ // expect three get requests:
+ // 1. read the footer (500 bytes per hint, not enough for the footer
metadata)
+ // 2. Read the footer metadata
+ // 3. reads the PageIndex
+ assert_snapshot!(
+ test.requests(),
@r"
RequestCountingObjectStore()
Total Requests: 4
- HEAD path=parquet_table.parquet
- - GET (range) range=2986-2994 path=parquet_table.parquet
+ - GET (range) range=2494-2994 path=parquet_table.parquet
- GET (range) range=2264-2986 path=parquet_table.parquet
- GET (range) range=2124-2264 path=parquet_table.parquet
"
);
}
+#[tokio::test]
+async fn create_single_parquet_file_small_prefetch() {
+ // configure a prefetch size that is large enough for the footer
+ // metadata but **not** the PageIndex
+ //
+ // Using the ranges from the test below (with no_prefetch),
+ // the 730 is determined as follows;
+ // --------
+ // 2286-2294: (8 bytes) footer + length
+ // 2264-2986: (722 bytes) footer metadata
+ let test = Test::new()
+ // 740 is enough to get both the footer + length (8 bytes)
+ // but not the entire PageIndex
+ .with_parquet_metadata_size_hint(Some(740))
+ .with_single_file_parquet()
+ .await;
+ // expect two get requests:
Review Comment:
Nice!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]