This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new a8cc1fd75bc branch-3.1: [fix](array) Fix array distance functions
#54348 (#54372)
a8cc1fd75bc is described below
commit a8cc1fd75bca319c1f6acc232615ea3b801fd27c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Aug 7 20:21:39 2025 +0800
branch-3.1: [fix](array) Fix array distance functions #54348 (#54372)
Cherry-picked from #54348
Co-authored-by: zhiqiang <[email protected]>
---
.../vec/functions/array/function_array_distance.h | 20 +++++--
.../functions/scalar/CosineDistance.java | 5 +-
.../expressions/functions/scalar/InnerProduct.java | 5 +-
.../expressions/functions/scalar/L1Distance.java | 5 +-
.../expressions/functions/scalar/L2Distance.java | 5 +-
.../test_array_distance_functions.out | Bin 235 -> 654 bytes
.../test_array_distance_functions.groovy | 64 +++++++++++++++++++++
7 files changed, 87 insertions(+), 17 deletions(-)
diff --git a/be/src/vec/functions/array/function_array_distance.h
b/be/src/vec/functions/array/function_array_distance.h
index 4349e93c97c..5a855c04988 100644
--- a/be/src/vec/functions/array/function_array_distance.h
+++ b/be/src/vec/functions/array/function_array_distance.h
@@ -141,25 +141,35 @@ public:
}
dst_null_data[row] = false;
- if (offsets1[row] != offsets2[row]) [[unlikely]] {
+
+ // Calculate actual array sizes for current row.
+ // For nullable arrays, we cannot compare absolute offset values
directly because:
+ // 1. When a row is null, its offset might equal the previous
offset (no elements added)
+ // 2. Or it might include the array size even if the row is null
(implementation dependent)
+ // Therefore, we must calculate the actual array size as:
offsets[row] - offsets[row-1]
+ ssize_t size1 = offsets1[row] - offsets1[row - 1];
+ ssize_t size2 = offsets2[row] - offsets2[row - 1];
+
+ if (size1 != size2) [[unlikely]] {
return Status::InvalidArgument(
"function {} have different input element sizes of
array: {} and {}",
- get_name(), offsets1[row] - offsets1[row - 1],
- offsets2[row] - offsets2[row - 1]);
+ get_name(), size1, size2);
}
typename DistanceImpl::State st;
for (ssize_t pos = offsets1[row - 1]; pos < offsets1[row]; ++pos) {
+ // Calculate corresponding position in the second array
+ ssize_t pos2 = offsets2[row - 1] + (pos - offsets1[row - 1]);
if (arr1.nested_nullmap_data && arr1.nested_nullmap_data[pos])
{
dst_null_data[row] = true;
break;
}
- if (arr2.nested_nullmap_data && arr2.nested_nullmap_data[pos])
{
+ if (arr2.nested_nullmap_data &&
arr2.nested_nullmap_data[pos2]) {
dst_null_data[row] = true;
break;
}
DistanceImpl::accumulate(st, nested_col1->get_element(pos),
- nested_col2->get_element(pos));
+ nested_col2->get_element(pos2));
}
if (!dst_null_data[row]) {
dst_data[row] = DistanceImpl::finalize(st);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
index 9761f949297..14c388bb933 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/CosineDistance.java
@@ -20,9 +20,8 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
* cosine_distance function
*/
public class CosineDistance extends ScalarFunction implements
ExplicitlyCastableSignature,
- ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+ BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(DoubleType.INSTANCE)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
index 5d226951cb1..a56d5d5a522 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/InnerProduct.java
@@ -20,9 +20,8 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
* inner_product function
*/
public class InnerProduct extends ScalarFunction implements
ExplicitlyCastableSignature,
- ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+ BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(DoubleType.INSTANCE)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
index 7423702fc7b..66a6ebd2bf4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L1Distance.java
@@ -20,9 +20,8 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
* l1_distance function
*/
public class L1Distance extends ScalarFunction implements
ExplicitlyCastableSignature,
- ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+ BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(DoubleType.INSTANCE)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
index 14ffee389ae..a9775f59ad7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/L2Distance.java
@@ -20,9 +20,8 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
-import
org.apache.doris.nereids.trees.expressions.functions.ComputePrecisionForArrayItemAgg;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
-import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
+import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.DoubleType;
@@ -36,7 +35,7 @@ import java.util.List;
* l2_distance function
*/
public class L2Distance extends ScalarFunction implements
ExplicitlyCastableSignature,
- ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable {
+ BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(DoubleType.INSTANCE)
diff --git
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
index 10c9f2ae306..4a14b8c9a40 100644
Binary files
a/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
and
b/regression-test/data/query_p0/sql_functions/array_functions/test_array_distance_functions.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
index cf196e0d61a..9010750a2ec 100644
---
a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
+++
b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_distance_functions.groovy
@@ -25,6 +25,21 @@ suite("test_array_distance_functions") {
qt_sql "SELECT l2_distance([1, 2, 3], NULL)"
qt_sql "SELECT cosine_distance([1, 2, 3], [0, NULL, 0])"
+ // Test cases for nullable arrays with different null distributions
+ // These test the fix for correct array size comparison when nulls are
present
+ qt_sql "SELECT l1_distance(NULL, NULL)"
+ qt_sql "SELECT l2_distance([1.0, 2.0], [3.0, 4.0])"
+ qt_sql "SELECT cosine_distance([1.0, 2.0, 3.0], [4.0, 5.0, 6.0])"
+ qt_sql "SELECT inner_product([2.0, 3.0], [4.0, 5.0])"
+
+ // Test arrays with NULL elements inside
+ qt_sql "SELECT l1_distance([1.0, NULL, 3.0], [4.0, NULL, 6.0])"
+ qt_sql "SELECT l2_distance([NULL, 2.0], [NULL, 5.0])"
+
+ // Test mixed nullable scenarios - these should work correctly after the
fix
+ qt_sql "SELECT l1_distance([1.0, 2.0], [3.0, 4.0]) as result1,
l1_distance(NULL, [5.0, 6.0]) as result2"
+ qt_sql "SELECT cosine_distance([1.0], [2.0]) as result1,
cosine_distance([3.0], NULL) as result2"
+
// abnormal test cases
try {
sql "SELECT l2_distance([0, 0], [1])"
@@ -37,4 +52,53 @@ suite("test_array_distance_functions") {
} catch (Exception ex) {
assert("${ex}".contains("function cosine_distance have different input
element sizes"))
}
+
+ // Test cases for the nullable array offset fix
+ // These cases specifically test scenarios where absolute offsets might
differ
+ // but actual array sizes are the same (should pass) or different (should
fail)
+ try {
+ sql "SELECT l1_distance([1.0, 2.0, 3.0], [4.0, 5.0])"
+ } catch (Exception ex) {
+ assert("${ex}".contains("function l1_distance have different input
element sizes"))
+ }
+
+ try {
+ sql "SELECT inner_product([1.0], [2.0, 3.0, 4.0])"
+ } catch (Exception ex) {
+ assert("${ex}".contains("function inner_product have different input
element sizes"))
+ }
+
+ // Edge case: empty arrays should work
+ qt_sql "SELECT l1_distance(CAST([] as ARRAY<DOUBLE>), CAST([] as
ARRAY<DOUBLE>))"
+ qt_sql "SELECT l2_distance(CAST([] as ARRAY<DOUBLE>), CAST([] as
ARRAY<DOUBLE>))"
+
+ // Comprehensive test for the offset fix: test with table data containing
mixed nulls
+ // This specifically tests the scenario where offsets might differ due to
null distribution
+ // but actual array sizes are the same
+ sql """
+ DROP TABLE IF EXISTS test_array_distance_nullable
+ """
+ sql """
+ CREATE TABLE test_array_distance_nullable (
+ id INT,
+ arr1 ARRAY<DOUBLE>,
+ arr2 ARRAY<DOUBLE>
+ ) PROPERTIES (
+ "replication_num" = "1"
+ )
+ """
+ sql """
+ INSERT INTO test_array_distance_nullable VALUES
+ (1, [1.0, 2.0], [3.0, 4.0]),
+ (2, NULL, [5.0, 6.0]),
+ (3, [7.0, 8.0], NULL),
+ (4, [9.0, 10.0], [11.0, 12.0]),
+ (5, NULL, NULL)
+ """
+
+ // These queries should work correctly after the fix
+ qt_sql "SELECT id, l1_distance(arr1, arr2) FROM
test_array_distance_nullable ORDER BY id"
+ qt_sql "SELECT id, l2_distance(arr1, arr2) FROM
test_array_distance_nullable ORDER BY id"
+ qt_sql "SELECT id, cosine_distance(arr1, arr2) FROM
test_array_distance_nullable ORDER BY id"
+ qt_sql "SELECT id, inner_product(arr1, arr2) FROM
test_array_distance_nullable ORDER BY id"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]