sdd commented on code in PR #515:
URL: https://github.com/apache/iceberg-rust/pull/515#discussion_r1715723742


##########
crates/iceberg/src/arrow/reader.rs:
##########
@@ -84,73 +98,113 @@ impl ArrowReaderBuilder {
 pub struct ArrowReader {
     batch_size: Option<usize>,
     file_io: FileIO,
+
+    /// the maximum number of data files that can be fetched at the same time
+    concurrency_limit_data_files: usize,
 }
 
 impl ArrowReader {
     /// Take a stream of FileScanTasks and reads all the files.
     /// Returns a stream of Arrow RecordBatches containing the data from the 
files
-    pub fn read(self, mut tasks: FileScanTaskStream) -> 
crate::Result<ArrowRecordBatchStream> {
+    pub fn read(self, tasks: FileScanTaskStream) -> 
Result<ArrowRecordBatchStream> {
         let file_io = self.file_io.clone();
-
-        Ok(try_stream! {
-            while let Some(task_result) = tasks.next().await {
-                match task_result {
-                    Ok(task) => {
-                        // Collect Parquet column indices from field ids
-                        let mut collector = CollectFieldIdVisitor {
-                            field_ids: HashSet::default(),
-                        };
-                        if let Some(predicates) = task.predicate() {
-                            visit(&mut collector, predicates)?;
+        let batch_size = self.batch_size;
+        let max_concurrent_fetching_datafiles = 
self.concurrency_limit_data_files;
+
+        let (tx, rx) = channel(10);

Review Comment:
   In the testing that I've done so far, I found that this changing this number 
made very little difference, but you're right, I think 
`concurrency_limit_data_files` makes more sense as the default. I will change 
it.
   
   I think once we have added some tracing / telemetry, we'll be able to build 
up better knowledge of how to tune these values.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to