zhuqi-lucas commented on issue #16838:
URL: https://github.com/apache/datafusion/issues/16838#issuecomment-3112288717

   > Thanks [@zhuqi-lucas](https://github.com/zhuqi-lucas) .Our scenario is 
`list(struct{})`, and the inner fields of struct are like:
   > 
   > let schema = Arc::new(Schema::new(vec![
   >     Field::new("col1", DataType::Utf8, false),
   >     Field::new("col2", DataType::Int32, false),
   >     Field::new(
   >         "col3",
   >         DataType::Timestamp(TimeUnit::Nanosecond, None),
   >         false,
   >     ),
   >     Field::new("col4", DataType::Int64, false),
   >     Field::new(
   >         "col5",
   >         DataType::List(Arc::new(Field::new("item1", DataType::Int32, 
true))),
   >         true,
   >     ),
   >     Field::new("col6", DataType::Float64, false),
   >     Field::new("col7", DataType::UInt32, true),
   >     Field::new("col8", DataType::UInt32, true),
   >     Field::new("col9", DataType::Int32, true),
   >     Field::new("col10", DataType::Utf8, false),
   >     Field::new("col11", DataType::Float64, false),
   > ]));
   
   But i can't reproduce the concact_fallback with above schema:
   
   ```rust
   {
           let batch_size = 8192;
           let batch_count = 2;
   
           let arrays = (0..batch_count)
               .map(|_| {
                   let col1 = Arc::new(create_string_array::<i32>(batch_size, 
0.0)) as ArrayRef;
                   let col2 = 
Arc::new(create_primitive_array::<Int32Type>(batch_size, 0.0)) as ArrayRef;
                   let col3 = 
Arc::new(create_primitive_array::<TimestampNanosecondType>(batch_size, 0.0)) as 
ArrayRef;
                   let col4 = 
Arc::new(create_primitive_array::<Int64Type>(batch_size, 0.0)) as ArrayRef;
   
   
                  
                   let total_values = batch_size * 1024;
                   let values: ArrayRef = 
Arc::new(create_primitive_array::<Int32Type>(total_values, 0.0));
                   let offsets = 
OffsetBuffer::from_lengths(std::iter::repeat(1024).take(batch_size));
                   
                   let field = Arc::new(Field::new("item1", DataType::Int32, 
true));
   
                   let list_array = ListArray::try_new(
                       field,
                       offsets,
                       values.clone(),
                       None,  
                   ).unwrap();
                   
                   assert_eq!(list_array.len(), batch_size);
   
                   let col5 = Arc::new(list_array) as ArrayRef;
   
                   println!("col5 datatype = {:?}", col5.data_type());
   
                   let col6 = 
Arc::new(create_primitive_array::<Float64Type>(batch_size, 0.0)) as ArrayRef;
                   let col7 = 
Arc::new(create_primitive_array::<UInt32Type>(batch_size, 0.2)) as ArrayRef;
                   let col8 = 
Arc::new(create_primitive_array::<UInt32Type>(batch_size, 0.2)) as ArrayRef;
                   let col9 = 
Arc::new(create_primitive_array::<Int32Type>(batch_size, 0.2)) as ArrayRef;
                   let col10 = Arc::new(create_string_array::<i32>(batch_size, 
0.0)) as ArrayRef;
                   let col11 = 
Arc::new(create_primitive_array::<Float64Type>(batch_size, 0.0)) as ArrayRef;
   
                   let fields = vec![
                       Field::new("col1", col1.data_type().clone(), false),
                       Field::new("col2", col2.data_type().clone(), false),
                       Field::new("col3", col3.data_type().clone(), false),
                       Field::new("col4", col4.data_type().clone(), false),
                       Field::new(
                           "col5",
                           DataType::List(Arc::new(Field::new("item1", 
DataType::Int32, true))),
                           true,
                       ),
                       Field::new("col6", col6.data_type().clone(), false),
                       Field::new("col7", col7.data_type().clone(), true),
                       Field::new("col8", col8.data_type().clone(), true),
                       Field::new("col9", col9.data_type().clone(), true),
                       Field::new("col10", col10.data_type().clone(), false),
                       Field::new("col11", col11.data_type().clone(), false),
                   ];
   
                   StructArray::try_new(
                       fields.clone().into(),
                       vec![col1, col2, col3, col4, col5, col6, col7, col8, 
col9, col10, col11],
                       None,
                   ).unwrap()
               })
               .collect::<Vec<_>>();
   
           let array_refs = arrays.iter().map(|a| a as &dyn 
Array).collect::<Vec<_>>();
   
           c.bench_function("concat complex struct 1024 × 2", |b| {
               b.iter(|| bench_concat_arrays(&array_refs))
           });
       }
   ```
   
   It will only reproduce with FixedSizeListArray, because FixedSizeListArray 
do not have the match cases, but DataType::List should not go to the 
concact_fallback, still confused for me.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to