alamb commented on a change in pull request #9565:
URL: https://github.com/apache/arrow/pull/9565#discussion_r583024015



##########
File path: rust/datafusion/src/physical_plan/type_coercion.rs
##########
@@ -168,20 +168,35 @@ fn maybe_data_types(
 pub fn can_coerce_from(type_into: &DataType, type_from: &DataType) -> bool {
     use self::DataType::*;
     match type_into {
-        Int8 => matches!(type_from, Int8),
-        Int16 => matches!(type_from, Int8 | Int16 | UInt8),
-        Int32 => matches!(type_from, Int8 | Int16 | Int32 | UInt8 | UInt16),
+        Int8 => matches!(type_from, Int8 | Utf8 | LargeUtf8),

Review comment:
       This code seems inconsistent with the comments of `can_coerce_from`: 
   ```
   /// Return true if a value of type `type_from` can be coerced
   /// (losslessly converted) into a value of `type_to`
   ```
   
   What would happen if we tried to coerce `"foo"` into an `Int8`? I think the 
correct answer is that that we should get a runtime error. I worry that we 
would end up silently converting to `Null`. 
   
   Can you explain why these coercion rules are needed?  And if we decide they 
are, can we please update the comments of this function to reflect the newfound 
understanding?
   
   @jorgecarleitao  I think did some previous work in this area, so he may have 
some relevant perspective. cc @andygrove  in case you are interested

##########
File path: rust/datafusion/tests/sql.rs
##########
@@ -530,17 +530,6 @@ async fn sqrt_f32_vs_f64() -> Result<()> {
     Ok(())
 }
 
-#[tokio::test]
-async fn csv_query_error() -> Result<()> {
-    // sin(utf8) should error
-    let mut ctx = create_ctx()?;
-    register_aggregate_csv(&mut ctx)?;
-    let sql = "SELECT sin(c1) FROM aggregate_test_100";
-    let plan = ctx.create_logical_plan(&sql);
-    assert!(plan.is_err());

Review comment:
       I think we should leave this test in (even if we decide that `sin(utf8)` 
should not error) and update it to show the expected results as a way of 
documenting the expected behavior. 

##########
File path: rust/datafusion/src/physical_plan/functions.rs
##########
@@ -1137,295 +1259,831 @@ mod tests {
             Float64Array
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some("trim")),
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int8(Some(2))),
+            ],
+            Ok(Some("ab")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some("trim ")),
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int64(Some(200))),
+            ],
+            Ok(Some("abcde")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim ")),
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int64(Some(-2))),
+            ],
+            Ok(Some("abc")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
-            Ok(Some("trim")),
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int64(Some(-200))),
+            ],
+            Ok(Some("")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))],
-            Ok(Some("\n trim ")),
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int64(Some(0))),
+            ],
+            Ok(Some("")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Ltrim,
-            &[lit(ScalarValue::Utf8(None))],
+            Left,
+            &[
+                lit(ScalarValue::Utf8(None)),
+                lit(ScalarValue::Int64(Some(2))),
+            ],
             Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("chars".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
-        );
-        test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("josé".to_string())))],
-            Ok(Some(5)),
-            i32,
-            Int32,
-            Int32Array
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("abcde".to_string()))),
+                lit(ScalarValue::Int64(None)),
+            ],
+            Ok(None),
+            &str,
+            Utf8,
+            StringArray
         );
         test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(Some("".to_string())))],
-            Ok(Some(0)),
-            i32,
-            Int32,
-            Int32Array
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
+            ],
+            Ok(Some("joséé")),
+            &str,
+            Utf8,
+            StringArray
         );
         test_function!(
-            OctetLength,
-            &[lit(ScalarValue::Utf8(None))],
-            Ok(None),
-            i32,
-            Int32,
-            Int32Array
+            Left,
+            &[
+                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
+                lit(ScalarValue::Int64(Some(-3))),
+            ],
+            Ok(Some("joséé")),
+            &str,
+            Utf8,
+            StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(0))),
+                lit(ScalarValue::Utf8(Some("josé".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
             ],
-            Ok(Some("alphabet")),
+            Ok(Some(" josé")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
                 lit(ScalarValue::Int64(Some(5))),
             ],
-            Ok(Some("ésoj")),
+            Ok(Some("   hi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(0))),
             ],
-            Ok(Some("alphabet")),
+            Ok(Some("")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(2))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(None)),
             ],
-            Ok(Some("lphabet")),
+            Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
+                lit(ScalarValue::Utf8(None)),
+                lit(ScalarValue::Int64(Some(5))),
             ],
-            Ok(Some("phabet")),
+            Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(-3))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
+                lit(ScalarValue::Utf8(Some("xy".to_string()))),
             ],
-            Ok(Some("alphabet")),
+            Ok(Some("xyxhi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(30))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(21))),
+                lit(ScalarValue::Utf8(Some("abcdef".to_string()))),
             ],
-            Ok(Some("")),
+            Ok(Some("abcdefabcdefabcdefahi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(None)),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
+                lit(ScalarValue::Utf8(Some(" ".to_string()))),
             ],
-            Ok(None),
+            Ok(Some("   hi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(2))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
+                lit(ScalarValue::Utf8(Some("".to_string()))),
             ],
-            Ok(Some("ph")),
+            Ok(Some("hi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(Some(20))),
+                lit(ScalarValue::Utf8(None)),
+                lit(ScalarValue::Int64(Some(5))),
+                lit(ScalarValue::Utf8(Some("xy".to_string()))),
             ],
-            Ok(Some("phabet")),
+            Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
                 lit(ScalarValue::Int64(None)),
-                lit(ScalarValue::Int64(Some(20))),
+                lit(ScalarValue::Utf8(Some("xy".to_string()))),
             ],
             Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(3))),
-                lit(ScalarValue::Int64(None)),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Int64(Some(5))),
+                lit(ScalarValue::Utf8(None)),
             ],
             Ok(None),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("alphabet".to_string()))),
-                lit(ScalarValue::Int64(Some(1))),
-                lit(ScalarValue::Int64(Some(-1))),
+                lit(ScalarValue::Utf8(Some("hi".to_string()))),
+                lit(ScalarValue::Utf8(Some("5".to_string()))),
+                lit(ScalarValue::Utf8(Some("xy".to_string()))),
             ],
-            Err(DataFusionError::Execution(
-                "negative substring length not allowed".to_string(),
-            )),
+            Ok(Some("xyxhi")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Substr,
+            Lpad,
             &[
-                lit(ScalarValue::Utf8(Some("joséésoj".to_string()))),
-                lit(ScalarValue::Int64(Some(5))),
-                lit(ScalarValue::Int64(Some(2))),
+                lit(ScalarValue::Utf8(Some("josé".to_string()))),
+                lit(ScalarValue::Int64(Some(10))),
+                lit(ScalarValue::Utf8(Some("xy".to_string()))),
             ],
-            Ok(Some("és")),
+            Ok(Some("xyxyxyjosé")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
-            Ok(Some("trim")),
+            Lpad,
+            &[
+                lit(ScalarValue::Utf8(Some("josé".to_string()))),
+                lit(ScalarValue::Int64(Some(10))),
+                lit(ScalarValue::Utf8(Some("éñ".to_string()))),
+            ],
+            Ok(Some("éñéñéñjosé")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
-            Ok(Some(" trim")),
+            Ltrim,
+            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
+            Ok(Some("trim")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim \n".to_string())))],
-            Ok(Some(" trim \n")),
+            Ltrim,
+            &[lit(ScalarValue::Utf8(Some(" trim ".to_string())))],
+            Ok(Some("trim ")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
-            &[lit(ScalarValue::Utf8(Some(" trim".to_string())))],
-            Ok(Some(" trim")),
+            Ltrim,
+            &[lit(ScalarValue::Utf8(Some("trim ".to_string())))],
+            Ok(Some("trim ")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
+            Ltrim,
             &[lit(ScalarValue::Utf8(Some("trim".to_string())))],
             Ok(Some("trim")),
             &str,
             Utf8,
             StringArray
         );
         test_function!(
-            Rtrim,
+            Ltrim,
+            &[lit(ScalarValue::Utf8(Some("\n trim ".to_string())))],
+            Ok(Some("\n trim ")),
+            &str,
+            Utf8,
+            StringArray
+        );
+        test_function!(

Review comment:
       I love these tests

##########
File path: rust/datafusion/tests/sql.rs
##########
@@ -2079,11 +2104,11 @@ async fn test_string_expressions() -> Result<()> {
     test_expression!("substr('alphabet', 2)", "lphabet");
     test_expression!("substr('alphabet', 3)", "phabet");
     test_expression!("substr('alphabet', 30)", "");
-    test_expression!("substr('alphabet', CAST(NULL AS int))", "NULL");

Review comment:
       👍 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to