debajyoti-truefoundry opened a new issue, #16684: URL: https://github.com/apache/datafusion/issues/16684
### Describe the bug I am on datafusion 47. ```rust use arrow::array::Int64Array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use datafusion::prelude::*; use parquet::arrow::arrow_writer::ArrowWriter; use parquet::file::properties::WriterProperties; use std::fs::File; use std::sync::Arc; use tempfile::TempDir; #[tokio::main] async fn main() -> Result<(), Box<dyn std::error::Error>> { println!("Creating parquet file with sample data..."); let temp_dir = TempDir::new()?; let parquet_path = temp_dir.path().join("sample_data.parquet"); let ids = Int64Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let parent_ids = Int64Array::from(vec![ Some(0), Some(1), Some(1), Some(2), Some(2), Some(3), Some(4), Some(5), Some(6), Some(7), ]); let values = Int64Array::from(vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100]); let schema = Arc::new(Schema::new(vec![ Field::new("id", DataType::Int64, false), Field::new("parent_id", DataType::Int64, true), Field::new("value", DataType::Int64, false), ])); let record_batch = RecordBatch::try_new( schema.clone(), vec![Arc::new(ids), Arc::new(parent_ids), Arc::new(values)], )?; let file = File::create(&parquet_path)?; let props = WriterProperties::builder().build(); let mut writer = ArrowWriter::try_new(file, schema, Some(props))?; writer.write(&record_batch)?; writer.close()?; println!("Parquet file created at: {:?}", parquet_path); let ctx = SessionContext::new(); ctx.register_parquet( "hierarchy", parquet_path.to_str().unwrap(), ParquetReadOptions::default(), ) .await?; println!("\nOriginal data:"); let df = ctx.sql("SELECT * FROM hierarchy ORDER BY id").await?; df.show().await?; let recursive_query = " EXPLAIN ANALYZE WITH RECURSIVE number_series AS ( SELECT id, 1 as level FROM hierarchy WHERE id = 1 UNION ALL SELECT ns.id + 1, ns.level + 1 FROM number_series ns WHERE ns.id < 10 ) SELECT * FROM number_series ORDER BY id "; let recursive_df = ctx.sql(recursive_query).await?; recursive_df.show().await?; Ok(()) } ``` ``` DataSourceExec: file_groups={1 group: [[var/folders/6z/kt4t6jkd4ss1_fj16dv_05xc0000gn/T/.tmpOjZiaN/sample_data.parquet]]}, projection=[id, parent_id, value], file_type=parquet, predicate=id@0 = 1, pruning_predicate=id_null_count@2 != row_count@3 AND id_min@0 <= 1 AND 1 <= id_max@1, required_guarantees=[id in (1)], metrics=[output_rows=10, elapsed_compute=1ns, bytes_scanned=565, file_open_errors=0, file_scan_errors=0, num_predicate_creation_errors=0, page_index_rows_matched=10, page_index_rows_pruned=0, predicate_evaluation_errors=0, pushdown_rows_matched=0, pushdown_rows_pruned=0, row_groups_matched_bloom_filter=0, row_groups_matched_statistics=1, row_groups_pruned_bloom_filter=0, row_groups_pruned_statistics=0, bloom_filter_eval_time=149.084µs, metadata_load_time=483.918µs, page_index_eval_time=124.959µs, row_pushdown_eval_time=2ns, statistics_eval_time=336.959µs, time_elapsed_opening=1.14175ms, time_elapsed_processing=1.198ms, time_elapsed_scanning_total=256.125µs, time_ela psed_scanning_until_data=232.5µs] ``` On `projection=[id, parent_id, value]`, why are we reading `value`, if `value` is unused in the query? ### To Reproduce _No response_ ### Expected behavior _No response_ ### Additional context _No response_ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org