alamb commented on code in PR #7890:
URL: https://github.com/apache/arrow-datafusion/pull/7890#discussion_r1367442510
##########
datafusion/core/src/datasource/physical_plan/avro.rs:
##########
@@ -420,7 +420,11 @@ mod tests {
statistics: Statistics::new_unknown(&file_schema),
file_schema,
limit: None,
- table_partition_cols: vec![("date".to_owned(), DataType::Utf8)],
+ table_partition_cols: vec![Field::new(
+ "date".to_owned(),
Review Comment:
```suggestion
"date",
```
##########
datafusion/core/src/datasource/physical_plan/csv.rs:
##########
@@ -871,7 +871,8 @@ mod tests {
let mut config = partitioned_csv_config(file_schema, file_groups)?;
// Add partition columns
- config.table_partition_cols = vec![("date".to_owned(),
DataType::Utf8)];
+ config.table_partition_cols =
+ vec![Field::new("date".to_owned(), DataType::Utf8, false)];
Review Comment:
```suggestion
vec![Field::new("date", DataType::Utf8, false)];
```
##########
datafusion/core/src/datasource/physical_plan/file_scan_config.rs:
##########
@@ -527,6 +526,38 @@ mod tests {
assert_eq!(col_indices, None);
}
+ #[test]
+ fn physical_plan_config_no_projection_tab_cols_as_field() {
+ let file_schema = aggr_test_schema();
+
+ // make a table_partition_col as a field
+ let table_partition_col = Field::new(
+ "date".to_owned(),
Review Comment:
You don't have to call `to_owned()` here -- `Field::new` will do it for you
```suggestion
"date",
```
##########
datafusion/core/src/datasource/physical_plan/file_scan_config.rs:
##########
@@ -748,6 +779,25 @@ mod tests {
projection: Option<Vec<usize>>,
statistics: Statistics,
table_partition_cols: Vec<(String, DataType)>,
+ ) -> FileScanConfig {
+ let table_partition_cols = table_partition_cols
+ .iter()
+ .map(|(name, dtype)| Field::new(name, dtype.clone(), false))
+ .collect::<Vec<_>>();
+
+ config_for_proj_with_field_tab_part(
+ file_schema,
+ projection,
+ statistics,
+ table_partition_cols,
+ )
+ }
+
+ fn config_for_proj_with_field_tab_part(
Review Comment:
I find this name confusing given the three letter abbreviations and I don't
think this is common elsewhere in the DataFusion codebase.
How about something like
```suggestion
fn config_for_projection_with_partition_fields(
```
Or maybe instead you could change `config_for_projection` to take
`table_partition_cols: Vec<Field>,` and make a function like
```rust
/// Convert all
fn partition_cols( table_partition_cols: Vec<(&str, DataType)>) ->
Vec<Field> {
table_partition_cols
.iter()
.map(|(name, dtype)| Field::new(name, dtype.clone(), false))
.collect::<Vec<_>>()
}
```
And then convert the call sites of `config_for_projection` to be
`config_for_projection(.., partition_cols(..))`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]