This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 52cf58b461 Add Utf8->Binary type coercion for comparison (#7080)
52cf58b461 is described below

commit 52cf58b46133d448e067455baab0faf8a50e565a
Author: Jonah Gao <[email protected]>
AuthorDate: Wed Jul 26 01:26:23 2023 +0800

    Add Utf8->Binary type coercion for comparison (#7080)
    
    * feat: add Utf8->Binary type coercion for comparison
    
    * Add sqllogictest
    
    * Simplify the code based on code review
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 .../core/tests/sqllogictests/test_files/binary.slt | 23 ++++++
 datafusion/expr/src/type_coercion/binary.rs        | 84 +++++++++++++++++++++-
 2 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/binary.slt 
b/datafusion/core/tests/sqllogictests/test_files/binary.slt
index ca55ff56cb..54499e2978 100644
--- a/datafusion/core/tests/sqllogictests/test_files/binary.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/binary.slt
@@ -45,6 +45,23 @@ FF01 ff01 Utf8 Binary
 ABC 0abc Utf8 Binary
 000 0000 Utf8 Binary
 
+# comparisons
+query ?BBBB
+SELECT
+  column2,
+  -- binary compare with string
+  column2 = 'ABC',
+  column2 <> 'ABC',
+  -- binary compared with binary
+  column2 = X'ABC',
+  column2 <> X'ABC'
+FROM t;
+----
+ff01 false true false true
+0abc false true true false
+0000 false true false true
+
+
 # predicates
 query T?
 SELECT column1, column2
@@ -127,3 +144,9 @@ SELECT column1, column1 = arrow_cast(X'0102', 
'FixedSizeBinary(2)') FROM t
 # Comparison to different sized Binary
 query error DataFusion error: Error during planning: Cannot infer common 
argument type for comparison operation FixedSizeBinary\(3\) = Binary
 SELECT column1, column1 = X'0102' FROM t
+
+statement ok
+drop table t_source
+
+statement ok
+drop table t
diff --git a/datafusion/expr/src/type_coercion/binary.rs 
b/datafusion/expr/src/type_coercion/binary.rs
index 56a5664638..9ebea19a16 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -318,6 +318,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<D
         .or_else(|| null_coercion(lhs_type, rhs_type))
         .or_else(|| string_numeric_coercion(lhs_type, rhs_type))
         .or_else(|| string_temporal_coercion(lhs_type, rhs_type))
+        .or_else(|| binary_coercion(lhs_type, rhs_type))
 }
 
 /// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a 
comparison operation
@@ -769,6 +770,18 @@ fn string_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<DataType>
     }
 }
 
+/// Coercion rules for Binaries: the type that both lhs and rhs can be
+/// casted to for the purpose of a computation
+fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
+    use arrow::datatypes::DataType::*;
+    match (lhs_type, rhs_type) {
+        (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
+        (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
+        | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
+        _ => None,
+    }
+}
+
 /// coercion rules for like operations.
 /// This is a union of string coercion rules and dictionary coercion rules
 pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
@@ -1039,10 +1052,13 @@ mod tests {
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
         assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), 
Some(Utf8));
 
-        // Can not coerce values of Binary to int,  cannot support this
+        // Since we can coerce values of Utf8 to Binary can support this
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
         let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary));
-        assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), None);
+        assert_eq!(
+            dictionary_coercion(&lhs_type, &rhs_type, true),
+            Some(Binary)
+        );
 
         let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
         let rhs_type = Utf8;
@@ -1443,6 +1459,70 @@ mod tests {
             DataType::Decimal128(15, 3)
         );
 
+        // Binary
+        test_coercion_binary_rule!(
+            DataType::Binary,
+            DataType::Binary,
+            Operator::Eq,
+            DataType::Binary
+        );
+        test_coercion_binary_rule!(
+            DataType::Utf8,
+            DataType::Binary,
+            Operator::Eq,
+            DataType::Binary
+        );
+        test_coercion_binary_rule!(
+            DataType::Binary,
+            DataType::Utf8,
+            Operator::Eq,
+            DataType::Binary
+        );
+
+        // LargeBinary
+        test_coercion_binary_rule!(
+            DataType::LargeBinary,
+            DataType::LargeBinary,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::Binary,
+            DataType::LargeBinary,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::LargeBinary,
+            DataType::Binary,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::Utf8,
+            DataType::LargeBinary,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::LargeBinary,
+            DataType::Utf8,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::LargeUtf8,
+            DataType::LargeBinary,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+        test_coercion_binary_rule!(
+            DataType::LargeBinary,
+            DataType::LargeUtf8,
+            Operator::Eq,
+            DataType::LargeBinary
+        );
+
         // TODO add other data type
         Ok(())
     }

Reply via email to