alamb commented on code in PR #6948:
URL: https://github.com/apache/arrow-rs/pull/6948#discussion_r1907865637
##########
parquet/src/arrow/async_reader/mod.rs:
##########
@@ -15,65 +15,13 @@
// specific language governing permissions and limitations
// under the License.
-//! Provides `async` API for reading parquet files as
+//! [`ParquetRecordBatchStreamBuilder`]: `async` API for reading Parquet
files as
//! [`RecordBatch`]es
//!
-//! ```
-//! # #[tokio::main(flavor="current_thread")]
-//! # async fn main() {
-//! #
-//! # use arrow_array::RecordBatch;
-//! # use arrow::util::pretty::pretty_format_batches;
-//! # use futures::TryStreamExt;
-//! # use tokio::fs::File;
-//! #
-//! # use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask};
-//! #
-//! # fn assert_batches_eq(batches: &[RecordBatch], expected_lines: &[&str]) {
-//! # let formatted = pretty_format_batches(batches).unwrap().to_string();
-//! # let actual_lines: Vec<_> = formatted.trim().lines().collect();
-//! # assert_eq!(
-//! # &actual_lines, expected_lines,
-//! # "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-//! # expected_lines, actual_lines
-//! # );
-//! # }
-//! #
-//! let testdata = arrow::util::test_util::parquet_test_data();
-//! let path = format!("{}/alltypes_plain.parquet", testdata);
-//! let file = File::open(path).await.unwrap();
+//! This can be used to decode a Parquet file in streaming fashion (without
+//! downloading the whole file at once) from a remote source, such as an
object store.
//!
-//! let builder = ParquetRecordBatchStreamBuilder::new(file)
-//! .await
-//! .unwrap()
-//! .with_batch_size(3);
-//!
-//! let file_metadata = builder.metadata().file_metadata();
-//! let mask = ProjectionMask::roots(file_metadata.schema_descr(), [1, 2, 6]);
-//!
-//! let stream = builder.with_projection(mask).build().unwrap();
-//! let results = stream.try_collect::<Vec<_>>().await.unwrap();
-//! assert_eq!(results.len(), 3);
-//!
-//! assert_batches_eq(
-//! &results,
-//! &[
-//! "+----------+-------------+-----------+",
-//! "| bool_col | tinyint_col | float_col |",
-//! "+----------+-------------+-----------+",
-//! "| true | 0 | 0.0 |",
-//! "| false | 1 | 1.1 |",
-//! "| true | 0 | 0.0 |",
-//! "| false | 1 | 1.1 |",
-//! "| true | 0 | 0.0 |",
-//! "| false | 1 | 1.1 |",
-//! "| true | 0 | 0.0 |",
-//! "| false | 1 | 1.1 |",
-//! "+----------+-------------+-----------+",
-//! ],
-//! );
-//! # }
-//! ```
+//! See example on [`ParquetRecordBatchStreamBuilder`]
Review Comment:
yes good call
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]