comphead commented on PR #3052:
URL: 
https://github.com/apache/datafusion-comet/pull/3052#issuecomment-3829210567

   I'm seeing issues like 
   ```
   Arrow error: Invalid argument error: Invalid date arithmetic operation: 
Date32 - Int8
   ```
   
   Checking if this can be addressed in Comet or in DataFusion
   
   
   Created a native test reproduce 
   
   ```
   #[test]
       fn test_date_sub_with_int8_cast_error() {
           use arrow::array::Date32Array;
   
           let session_ctx = SessionContext::new();
           let task_ctx = session_ctx.task_ctx();
           let planner = PhysicalPlanner::new(Arc::from(session_ctx), 0);
   
           // Create a scan operator with Date32 (DATE) and Int8 (TINYINT) 
columns
           // This simulates the schema from the Scala test where _20 is DATE 
and _2 is TINYINT
           let op_scan = Operator {
               plan_id: 0,
               children: vec![],
               op_struct: Some(OpStruct::Scan(spark_operator::Scan {
                   fields: vec![
                       spark_expression::DataType {
                           type_id: 12, // DATE (Date32)
                           type_info: None,
                       },
                       spark_expression::DataType {
                           type_id: 1, // INT8 (TINYINT)
                           type_info: None,
                       },
                   ],
                   source: "test".to_string(),
                   arrow_ffi_safe: false,
               })),
           };
   
           // Create bound reference for the DATE column (index 0)
           let date_col = spark_expression::Expr {
               expr_struct: Some(Bound(spark_expression::BoundReference {
                   index: 0,
                   datatype: Some(spark_expression::DataType {
                       type_id: 12, // DATE
                       type_info: None,
                   }),
               })),
           };
   
           // Create bound reference for the INT8 column (index 1)
           let int8_col = spark_expression::Expr {
               expr_struct: Some(Bound(spark_expression::BoundReference {
                   index: 1,
                   datatype: Some(spark_expression::DataType {
                       type_id: 1, // INT8
                       type_info: None,
                   }),
               })),
           };
   
           // Create a Subtract expression: date_col - int8_col
           // This is equivalent to the SQL: SELECT _20 - _2 FROM tbl (date_sub 
operation)
           // In the protobuf, subtract uses MathExpr type
           let subtract_expr = spark_expression::Expr {
               expr_struct: 
Some(ExprStruct::Subtract(Box::new(spark_expression::MathExpr {
                   left: Some(Box::new(date_col)),
                   right: Some(Box::new(int8_col)),
                   return_type: Some(spark_expression::DataType {
                       type_id: 12, // DATE - result should be DATE
                       type_info: None,
                   }),
                   eval_mode: 0, // Legacy mode
               }))),
           };
   
           // Create a projection operator with the subtract expression
           let projection = Operator {
               children: vec![op_scan],
               plan_id: 1,
               op_struct: Some(OpStruct::Projection(spark_operator::Projection {
                   project_list: vec![subtract_expr],
               })),
           };
   
           // Create the physical plan
           let (mut scans, datafusion_plan) =
               planner.create_plan(&projection, &mut vec![], 1).unwrap();
   
           // Execute the plan with test data
           let mut stream = datafusion_plan.native_plan.execute(0, 
task_ctx).unwrap();
   
           let runtime = tokio::runtime::Runtime::new().unwrap();
           let (tx, mut rx) = mpsc::channel(1);
   
           // Send test data: Date32 values and Int8 values
           runtime.spawn(async move {
               // Create Date32 array (days since epoch)
               // 19000 days = approximately 2022-01-01
               let date_array = Date32Array::from(vec![Some(19000), 
Some(19001), Some(19002)]);
               // Create Int8 array
               let int8_array = Int8Array::from(vec![Some(1i8), Some(2i8), 
Some(3i8)]);
   
               let input_batch1 =
                   InputBatch::Batch(vec![Arc::new(date_array), 
Arc::new(int8_array)], 3);
               let input_batch2 = InputBatch::EOF;
   
               let batches = vec![input_batch1, input_batch2];
   
               for batch in batches.into_iter() {
                   tx.send(batch).await.unwrap();
               }
           });
   
           // Execute and expect the cast error
           runtime.block_on(async move {
               loop {
                   let batch = rx.recv().await.unwrap();
                   scans[0].set_input_batch(batch);
                   match poll!(stream.next()) {
                       Poll::Ready(Some(result)) => {
                           // We expect an error here related to date 
arithmetic with Int8
                           // The error can be either:
                           // - "Cast error: Casting from Int8 to Date32 not 
supported" (from Spark/Comet cast)
                           // - "Invalid date arithmetic operation: Date32 - 
Int8" (from Arrow)
                           assert!(
                               result.is_err(),
                               "Expected error for date - int8 operation but 
got success: {:?}",
                               result
                           );
                           let err = result.unwrap_err();
                           let err_msg = err.to_string();
                           println!("{}", &err_msg);
                           assert!(
                               err_msg.contains("Cast error")
                                   || err_msg.contains("Casting from Int8 to 
Date32")
                                   || err_msg.contains("Invalid date arithmetic 
operation")
                                   || err_msg.contains("Date32 - Int8"),
                               "Expected date arithmetic error message but got: 
{}",
                               err_msg
                           );
                           break;
                       }
                       Poll::Ready(None) => {
                           panic!("Stream ended without producing expected 
error");
                       }
                       _ => {}
                   }
               }
           });
       }
   ```
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to