debajyoti-truefoundry opened a new issue, #16684:
URL: https://github.com/apache/datafusion/issues/16684

   ### Describe the bug
   
   I am on datafusion 47.
   
   ```rust
   use arrow::array::Int64Array;
   use arrow::datatypes::{DataType, Field, Schema};
   use arrow::record_batch::RecordBatch;
   use datafusion::prelude::*;
   use parquet::arrow::arrow_writer::ArrowWriter;
   use parquet::file::properties::WriterProperties;
   use std::fs::File;
   use std::sync::Arc;
   use tempfile::TempDir;
   
   #[tokio::main]
   async fn main() -> Result<(), Box<dyn std::error::Error>> {
       println!("Creating parquet file with sample data...");
   
       let temp_dir = TempDir::new()?;
       let parquet_path = temp_dir.path().join("sample_data.parquet");
   
       let ids = Int64Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
       let parent_ids = Int64Array::from(vec![
           Some(0),
           Some(1),
           Some(1),
           Some(2),
           Some(2),
           Some(3),
           Some(4),
           Some(5),
           Some(6),
           Some(7),
       ]);
       let values = Int64Array::from(vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 
100]);
   
       let schema = Arc::new(Schema::new(vec![
           Field::new("id", DataType::Int64, false),
           Field::new("parent_id", DataType::Int64, true),
           Field::new("value", DataType::Int64, false),
       ]));
   
       let record_batch = RecordBatch::try_new(
           schema.clone(),
           vec![Arc::new(ids), Arc::new(parent_ids), Arc::new(values)],
       )?;
   
       let file = File::create(&parquet_path)?;
       let props = WriterProperties::builder().build();
       let mut writer = ArrowWriter::try_new(file, schema, Some(props))?;
       writer.write(&record_batch)?;
       writer.close()?;
   
       println!("Parquet file created at: {:?}", parquet_path);
   
       let ctx = SessionContext::new();
       ctx.register_parquet(
           "hierarchy",
           parquet_path.to_str().unwrap(),
           ParquetReadOptions::default(),
       )
       .await?;
   
       println!("\nOriginal data:");
       let df = ctx.sql("SELECT * FROM hierarchy ORDER BY id").await?;
       df.show().await?;
   
       let recursive_query = "
           EXPLAIN ANALYZE
           WITH RECURSIVE number_series AS (
               SELECT id, 1 as level
               FROM hierarchy 
               WHERE id = 1
               
               UNION ALL
               
               SELECT ns.id + 1, ns.level + 1
               FROM number_series ns
               WHERE ns.id < 10
           )
           SELECT * FROM number_series ORDER BY id
       ";
   
       let recursive_df = ctx.sql(recursive_query).await?;
       recursive_df.show().await?;
   
       Ok(())
   }
   ```
   
   ```
   DataSourceExec: file_groups={1 group: 
[[var/folders/6z/kt4t6jkd4ss1_fj16dv_05xc0000gn/T/.tmpOjZiaN/sample_data.parquet]]},
 projection=[id, parent_id, value], file_type=parquet, predicate=id@0 = 1, 
pruning_predicate=id_null_count@2 != row_count@3 AND id_min@0 <= 1 AND 1 <= 
id_max@1, required_guarantees=[id in (1)], metrics=[output_rows=10, 
elapsed_compute=1ns, bytes_scanned=565, file_open_errors=0, file_scan_errors=0, 
num_predicate_creation_errors=0, page_index_rows_matched=10, 
page_index_rows_pruned=0, predicate_evaluation_errors=0, 
pushdown_rows_matched=0, pushdown_rows_pruned=0, 
row_groups_matched_bloom_filter=0, row_groups_matched_statistics=1, 
row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0, 
bloom_filter_eval_time=149.084µs, metadata_load_time=483.918µs, 
page_index_eval_time=124.959µs, row_pushdown_eval_time=2ns, 
statistics_eval_time=336.959µs, time_elapsed_opening=1.14175ms, 
time_elapsed_processing=1.198ms, time_elapsed_scanning_total=256.125µs, time_ela
 psed_scanning_until_data=232.5µs]
   ```
   
   On `projection=[id, parent_id, value]`, why are we reading `value`, if 
`value` is unused in the query? 
   
   ### To Reproduce
   
   _No response_
   
   ### Expected behavior
   
   _No response_
   
   ### Additional context
   
   _No response_


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to