This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 52cf58b461 Add Utf8->Binary type coercion for comparison (#7080)
52cf58b461 is described below
commit 52cf58b46133d448e067455baab0faf8a50e565a
Author: Jonah Gao <[email protected]>
AuthorDate: Wed Jul 26 01:26:23 2023 +0800
Add Utf8->Binary type coercion for comparison (#7080)
* feat: add Utf8->Binary type coercion for comparison
* Add sqllogictest
* Simplify the code based on code review
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
.../core/tests/sqllogictests/test_files/binary.slt | 23 ++++++
datafusion/expr/src/type_coercion/binary.rs | 84 +++++++++++++++++++++-
2 files changed, 105 insertions(+), 2 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/binary.slt
b/datafusion/core/tests/sqllogictests/test_files/binary.slt
index ca55ff56cb..54499e2978 100644
--- a/datafusion/core/tests/sqllogictests/test_files/binary.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/binary.slt
@@ -45,6 +45,23 @@ FF01 ff01 Utf8 Binary
ABC 0abc Utf8 Binary
000 0000 Utf8 Binary
+# comparisons
+query ?BBBB
+SELECT
+ column2,
+ -- binary compare with string
+ column2 = 'ABC',
+ column2 <> 'ABC',
+ -- binary compared with binary
+ column2 = X'ABC',
+ column2 <> X'ABC'
+FROM t;
+----
+ff01 false true false true
+0abc false true true false
+0000 false true false true
+
+
# predicates
query T?
SELECT column1, column2
@@ -127,3 +144,9 @@ SELECT column1, column1 = arrow_cast(X'0102',
'FixedSizeBinary(2)') FROM t
# Comparison to different sized Binary
query error DataFusion error: Error during planning: Cannot infer common
argument type for comparison operation FixedSizeBinary\(3\) = Binary
SELECT column1, column1 = X'0102' FROM t
+
+statement ok
+drop table t_source
+
+statement ok
+drop table t
diff --git a/datafusion/expr/src/type_coercion/binary.rs
b/datafusion/expr/src/type_coercion/binary.rs
index 56a5664638..9ebea19a16 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -318,6 +318,7 @@ pub fn comparison_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<D
.or_else(|| null_coercion(lhs_type, rhs_type))
.or_else(|| string_numeric_coercion(lhs_type, rhs_type))
.or_else(|| string_temporal_coercion(lhs_type, rhs_type))
+ .or_else(|| binary_coercion(lhs_type, rhs_type))
}
/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
comparison operation
@@ -769,6 +770,18 @@ fn string_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<DataType>
}
}
+/// Coercion rules for Binaries: the type that both lhs and rhs can be
+/// casted to for the purpose of a computation
+fn binary_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+ use arrow::datatypes::DataType::*;
+ match (lhs_type, rhs_type) {
+ (Binary | Utf8, Binary) | (Binary, Utf8) => Some(Binary),
+ (LargeBinary | Binary | Utf8 | LargeUtf8, LargeBinary)
+ | (LargeBinary, Binary | Utf8 | LargeUtf8) => Some(LargeBinary),
+ _ => None,
+ }
+}
+
/// coercion rules for like operations.
/// This is a union of string coercion rules and dictionary coercion rules
pub fn like_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
@@ -1039,10 +1052,13 @@ mod tests {
let rhs_type = Dictionary(Box::new(Int8), Box::new(Int16));
assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true),
Some(Utf8));
- // Can not coerce values of Binary to int, cannot support this
+ // Since we can coerce values of Utf8 to Binary can support this
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Dictionary(Box::new(Int8), Box::new(Binary));
- assert_eq!(dictionary_coercion(&lhs_type, &rhs_type, true), None);
+ assert_eq!(
+ dictionary_coercion(&lhs_type, &rhs_type, true),
+ Some(Binary)
+ );
let lhs_type = Dictionary(Box::new(Int8), Box::new(Utf8));
let rhs_type = Utf8;
@@ -1443,6 +1459,70 @@ mod tests {
DataType::Decimal128(15, 3)
);
+ // Binary
+ test_coercion_binary_rule!(
+ DataType::Binary,
+ DataType::Binary,
+ Operator::Eq,
+ DataType::Binary
+ );
+ test_coercion_binary_rule!(
+ DataType::Utf8,
+ DataType::Binary,
+ Operator::Eq,
+ DataType::Binary
+ );
+ test_coercion_binary_rule!(
+ DataType::Binary,
+ DataType::Utf8,
+ Operator::Eq,
+ DataType::Binary
+ );
+
+ // LargeBinary
+ test_coercion_binary_rule!(
+ DataType::LargeBinary,
+ DataType::LargeBinary,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::Binary,
+ DataType::LargeBinary,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::LargeBinary,
+ DataType::Binary,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::Utf8,
+ DataType::LargeBinary,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::LargeBinary,
+ DataType::Utf8,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::LargeUtf8,
+ DataType::LargeBinary,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+ test_coercion_binary_rule!(
+ DataType::LargeBinary,
+ DataType::LargeUtf8,
+ Operator::Eq,
+ DataType::LargeBinary
+ );
+
// TODO add other data type
Ok(())
}