comphead commented on PR #3052:
URL:
https://github.com/apache/datafusion-comet/pull/3052#issuecomment-3829210567
I'm seeing issues like
```
Arrow error: Invalid argument error: Invalid date arithmetic operation:
Date32 - Int8
```
Checking if this can be addressed in Comet or in DataFusion
Created a native test reproduce
```
#[test]
fn test_date_sub_with_int8_cast_error() {
use arrow::array::Date32Array;
let session_ctx = SessionContext::new();
let task_ctx = session_ctx.task_ctx();
let planner = PhysicalPlanner::new(Arc::from(session_ctx), 0);
// Create a scan operator with Date32 (DATE) and Int8 (TINYINT)
columns
// This simulates the schema from the Scala test where _20 is DATE
and _2 is TINYINT
let op_scan = Operator {
plan_id: 0,
children: vec![],
op_struct: Some(OpStruct::Scan(spark_operator::Scan {
fields: vec![
spark_expression::DataType {
type_id: 12, // DATE (Date32)
type_info: None,
},
spark_expression::DataType {
type_id: 1, // INT8 (TINYINT)
type_info: None,
},
],
source: "test".to_string(),
arrow_ffi_safe: false,
})),
};
// Create bound reference for the DATE column (index 0)
let date_col = spark_expression::Expr {
expr_struct: Some(Bound(spark_expression::BoundReference {
index: 0,
datatype: Some(spark_expression::DataType {
type_id: 12, // DATE
type_info: None,
}),
})),
};
// Create bound reference for the INT8 column (index 1)
let int8_col = spark_expression::Expr {
expr_struct: Some(Bound(spark_expression::BoundReference {
index: 1,
datatype: Some(spark_expression::DataType {
type_id: 1, // INT8
type_info: None,
}),
})),
};
// Create a Subtract expression: date_col - int8_col
// This is equivalent to the SQL: SELECT _20 - _2 FROM tbl (date_sub
operation)
// In the protobuf, subtract uses MathExpr type
let subtract_expr = spark_expression::Expr {
expr_struct:
Some(ExprStruct::Subtract(Box::new(spark_expression::MathExpr {
left: Some(Box::new(date_col)),
right: Some(Box::new(int8_col)),
return_type: Some(spark_expression::DataType {
type_id: 12, // DATE - result should be DATE
type_info: None,
}),
eval_mode: 0, // Legacy mode
}))),
};
// Create a projection operator with the subtract expression
let projection = Operator {
children: vec![op_scan],
plan_id: 1,
op_struct: Some(OpStruct::Projection(spark_operator::Projection {
project_list: vec![subtract_expr],
})),
};
// Create the physical plan
let (mut scans, datafusion_plan) =
planner.create_plan(&projection, &mut vec![], 1).unwrap();
// Execute the plan with test data
let mut stream = datafusion_plan.native_plan.execute(0,
task_ctx).unwrap();
let runtime = tokio::runtime::Runtime::new().unwrap();
let (tx, mut rx) = mpsc::channel(1);
// Send test data: Date32 values and Int8 values
runtime.spawn(async move {
// Create Date32 array (days since epoch)
// 19000 days = approximately 2022-01-01
let date_array = Date32Array::from(vec![Some(19000),
Some(19001), Some(19002)]);
// Create Int8 array
let int8_array = Int8Array::from(vec![Some(1i8), Some(2i8),
Some(3i8)]);
let input_batch1 =
InputBatch::Batch(vec![Arc::new(date_array),
Arc::new(int8_array)], 3);
let input_batch2 = InputBatch::EOF;
let batches = vec![input_batch1, input_batch2];
for batch in batches.into_iter() {
tx.send(batch).await.unwrap();
}
});
// Execute and expect the cast error
runtime.block_on(async move {
loop {
let batch = rx.recv().await.unwrap();
scans[0].set_input_batch(batch);
match poll!(stream.next()) {
Poll::Ready(Some(result)) => {
// We expect an error here related to date
arithmetic with Int8
// The error can be either:
// - "Cast error: Casting from Int8 to Date32 not
supported" (from Spark/Comet cast)
// - "Invalid date arithmetic operation: Date32 -
Int8" (from Arrow)
assert!(
result.is_err(),
"Expected error for date - int8 operation but
got success: {:?}",
result
);
let err = result.unwrap_err();
let err_msg = err.to_string();
println!("{}", &err_msg);
assert!(
err_msg.contains("Cast error")
|| err_msg.contains("Casting from Int8 to
Date32")
|| err_msg.contains("Invalid date arithmetic
operation")
|| err_msg.contains("Date32 - Int8"),
"Expected date arithmetic error message but got:
{}",
err_msg
);
break;
}
Poll::Ready(None) => {
panic!("Stream ended without producing expected
error");
}
_ => {}
}
}
});
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]