This is an automated email from the ASF dual-hosted git repository.
HappenLee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 7c409402b7b [fix](be) Skip NaN values in PERCENTILE/PERCENTILE_ARRAY
aggregations (#63472)
7c409402b7b is described below
commit 7c409402b7b1b6f6f616e9c1b9a85e1308239b51
Author: TengJianPing <[email protected]>
AuthorDate: Mon Jun 29 11:23:16 2026 +0800
[fix](be) Skip NaN values in PERCENTILE/PERCENTILE_ARRAY aggregations
(#63472)
Problem Summary: For floating-point inputs, PERCENTILE(v, q) (and
PERCENTILE_ARRAY/PERCENTILE_CONT, all routed to
AggregateFunctionPercentileV2 / PercentileExactState) propagated NaN
through std::nth_element / std::min_element, which use unordered
comparisons on NaN. With data like (1.0, NaN, 2.0), PERCENTILE(v, 0.5)
returned NaN, while PERCENTILE_APPROX returned 1.5 because TDigest
internally discards NaN. This made the exact and approximate variants
inconsistent and hid valid percentile results.
Fix: in PercentileExactState::_append, when ValueType is a
floating-point type, skip NaN values before they are added to the values
array. Integer paths are unchanged. After the fix, PERCENTILE on (1.0,
NaN, 2.0) returns 1.5, matching PERCENTILE_APPROX.
---
.../aggregate/aggregate_function_percentile.h | 53 +++++++++-------------
.../support_type/percentile/percentile.out | 42 +++++++++++++++++
.../percentile_ap/percentile_approx.out | 21 +++++++++
.../percentile_ap_w/percentile_approx_weighted.out | 21 +++++++++
.../percentile_ar/percentile_array.out | 21 +++++++++
.../support_type/percentile/percentile.groovy | 48 ++++++++++++++++++++
.../percentile_ap/percentile_approx.groovy | 9 ++++
.../percentile_approx_weighted.groovy | 8 ++++
.../percentile_ar/percentile_array.groovy | 7 +++
9 files changed, 199 insertions(+), 31 deletions(-)
diff --git a/be/src/exprs/aggregate/aggregate_function_percentile.h
b/be/src/exprs/aggregate/aggregate_function_percentile.h
index 439016aa54d..67a6e1f9463 100644
--- a/be/src/exprs/aggregate/aggregate_function_percentile.h
+++ b/be/src/exprs/aggregate/aggregate_function_percentile.h
@@ -195,13 +195,7 @@ public:
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
auto& col = assert_cast<ColumnFloat64&>(to);
- double result = this->data(place).get();
-
- if (std::isnan(result)) {
- col.insert_default();
- } else {
- col.get_data().push_back(result);
- }
+ col.get_data().push_back(this->data(place).get());
}
};
@@ -232,13 +226,7 @@ public:
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
auto& col = assert_cast<ColumnFloat64&>(to);
- double result = this->data(place).get();
-
- if (std::isnan(result)) {
- col.insert_default();
- } else {
- col.get_data().push_back(result);
- }
+ col.get_data().push_back(this->data(place).get());
}
};
@@ -270,13 +258,7 @@ public:
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
auto& col = assert_cast<ColumnFloat64&>(to);
- double result = this->data(place).get();
-
- if (std::isnan(result)) {
- col.insert_default();
- } else {
- col.get_data().push_back(result);
- }
+ col.get_data().push_back(this->data(place).get());
}
};
@@ -310,13 +292,7 @@ public:
void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn&
to) const override {
auto& col = assert_cast<ColumnFloat64&>(to);
- double result = this->data(place).get();
-
- if (std::isnan(result)) {
- col.insert_default();
- } else {
- col.get_data().push_back(result);
- }
+ col.get_data().push_back(this->data(place).get());
}
};
@@ -509,7 +485,7 @@ struct PercentileExactState {
double get() const {
if (!inited_flag || levels.empty() || values.empty()) {
- return 0.0;
+ return std::numeric_limits<double>::quiet_NaN();
}
DCHECK_EQ(levels.quantiles.size(), 1);
@@ -518,7 +494,7 @@ struct PercentileExactState {
void insert_result_into(IColumn& to) const {
auto& column_data = assert_cast<ColumnFloat64&>(to).get_data();
- if (!inited_flag || levels.empty() || values.empty()) {
+ if (!inited_flag || levels.empty()) {
return;
}
@@ -527,6 +503,13 @@ struct PercentileExactState {
column_data.resize(old_size + size);
auto* result = column_data.data() + old_size;
+ if (values.empty()) {
+ for (size_t i = 0; i < size; ++i) {
+ result[i] = std::numeric_limits<double>::quiet_NaN();
+ }
+ return;
+ }
+
if (values.size() == 1) {
for (size_t i = 0; i < size; ++i) {
result[i] = static_cast<double>(values.front());
@@ -588,7 +571,15 @@ private:
return;
}
values.reserve(values.size() + count);
- values.insert_assume_reserved(data, data + count);
+ if constexpr (std::is_floating_point_v<ValueType>) {
+ for (size_t i = 0; i < count; ++i) {
+ if (!std::isnan(data[i])) {
+ values.push_back(data[i]);
+ }
+ }
+ } else {
+ values.insert_assume_reserved(data, data + count);
+ }
}
double _get_result(double quantile) const {
diff --git
a/regression-test/data/query_p0/aggregate/support_type/percentile/percentile.out
b/regression-test/data/query_p0/aggregate/support_type/percentile/percentile.out
index 4c8010a3e97..4c5e3c5e57d 100644
---
a/regression-test/data/query_p0/aggregate/support_type/percentile/percentile.out
+++
b/regression-test/data/query_p0/aggregate/support_type/percentile/percentile.out
@@ -1,4 +1,25 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !percentile_tinyint_empty --
+\N
+
+-- !percentile_smallint_empty --
+\N
+
+-- !percentile_int_empty --
+\N
+
+-- !percentile_bigint_empty --
+\N
+
+-- !percentile_largeint_empty --
+\N
+
+-- !percentile_float_empty --
+\N
+
+-- !percentile_double_empty --
+\N
+
-- !percentile_tinyint --
101
@@ -20,3 +41,24 @@
-- !percentile_double --
2.718281828
+-- !percentile_double_nan --
+1.5
+
+-- !percentile_float_nan --
+1.5
+
+-- !percentile_array_double_nan --
+[1.25, 1.5, 1.75]
+
+-- !percentile_approx_double_nan --
+1.5
+
+-- !percentile_all_nan --
+NaN
+
+-- !percentile_array_all_nan --
+[NaN, NaN, NaN]
+
+-- !percentile_approx_all_nan --
+NaN
+
diff --git
a/regression-test/data/query_p0/aggregate/support_type/percentile_ap/percentile_approx.out
b/regression-test/data/query_p0/aggregate/support_type/percentile_ap/percentile_approx.out
index 4dc5f430c7b..10c81f9f095 100644
---
a/regression-test/data/query_p0/aggregate/support_type/percentile_ap/percentile_approx.out
+++
b/regression-test/data/query_p0/aggregate/support_type/percentile_ap/percentile_approx.out
@@ -1,4 +1,25 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !percentile_approx_tinyint_empty --
+\N
+
+-- !percentile_approx_smallint_empty --
+\N
+
+-- !percentile_approx_int_empty --
+\N
+
+-- !percentile_approx_bigint_empty --
+\N
+
+-- !percentile_approx_largeint_empty --
+\N
+
+-- !percentile_approx_float_empty --
+\N
+
+-- !percentile_approx_double_empty --
+\N
+
-- !percentile_approx_tinyint --
100
diff --git
a/regression-test/data/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.out
b/regression-test/data/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.out
index 6999d55f3df..62dbfd409c2 100644
---
a/regression-test/data/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.out
+++
b/regression-test/data/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.out
@@ -1,4 +1,25 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !percentile_approx_weighted_tinyint_empty --
+\N
+
+-- !percentile_approx_weighted_smallint_empty --
+\N
+
+-- !percentile_approx_weighted_int_empty --
+\N
+
+-- !percentile_approx_weighted_bigint_empty --
+\N
+
+-- !percentile_approx_weighted_largeint_empty --
+\N
+
+-- !percentile_approx_weighted_float_empty --
+\N
+
+-- !percentile_approx_weighted_double_empty --
+\N
+
-- !percentile_approx_weighted_double --
2.718281745910645
diff --git
a/regression-test/data/query_p0/aggregate/support_type/percentile_ar/percentile_array.out
b/regression-test/data/query_p0/aggregate/support_type/percentile_ar/percentile_array.out
index 7ba2e93f54e..39ee4a649ce 100644
---
a/regression-test/data/query_p0/aggregate/support_type/percentile_ar/percentile_array.out
+++
b/regression-test/data/query_p0/aggregate/support_type/percentile_ar/percentile_array.out
@@ -1,4 +1,25 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !percentile_array_tinyint_empty --
+[]
+
+-- !percentile_array_smallint_empty --
+[]
+
+-- !percentile_array_int_empty --
+[]
+
+-- !percentile_array_bigint_empty --
+[]
+
+-- !percentile_array_largeint_empty --
+[]
+
+-- !percentile_array_float_empty --
+[]
+
+-- !percentile_array_double_empty --
+[]
+
-- !percentile_array_tinyint --
[101, 101.6]
diff --git
a/regression-test/suites/query_p0/aggregate/support_type/percentile/percentile.groovy
b/regression-test/suites/query_p0/aggregate/support_type/percentile/percentile.groovy
index 7df881aface..6ea4fb09ed8 100644
---
a/regression-test/suites/query_p0/aggregate/support_type/percentile/percentile.groovy
+++
b/regression-test/suites/query_p0/aggregate/support_type/percentile/percentile.groovy
@@ -38,6 +38,13 @@ suite("percentile") {
distributed BY hash(k1) buckets 3
properties("replication_num" = "1");
"""
+ qt_percentile_tinyint_empty """select percentile(col_tinyint, 0.5) from
d_table;"""
+ qt_percentile_smallint_empty """select percentile(col_smallint, 0.5) from
d_table;"""
+ qt_percentile_int_empty """select percentile(col_int, 0.5) from d_table;"""
+ qt_percentile_bigint_empty """select percentile(col_bigint, 0.5) from
d_table;"""
+ qt_percentile_largeint_empty """select percentile(col_largeint, 0.5) from
d_table;"""
+ qt_percentile_float_empty """select percentile(col_float, 0.5) from
d_table;"""
+ qt_percentile_double_empty """select percentile(col_double, 0.5) from
d_table;"""
// 插入测试数据
sql """
@@ -55,4 +62,45 @@ suite("percentile") {
qt_percentile_largeint """select percentile(col_largeint, 0.5) from
d_table;"""
qt_percentile_float """select percentile(col_float, 0.5) from d_table;"""
qt_percentile_double """select percentile(col_double, 0.5) from d_table;"""
+
+ sql """drop table if exists percentile_nan_t;"""
+ sql """
+ create table percentile_nan_t (
+ id int,
+ v_double double,
+ v_float float
+ ) duplicate key(id)
+ distributed by hash(id) buckets 1
+ properties("replication_num" = "1");
+ """
+ sql """
+ insert into percentile_nan_t values
+ (1, 1.0, cast(1.0 as float)),
+ (2, cast('nan' as double), cast('nan' as float)),
+ (3, 2.0, cast(2.0 as float));
+ """
+
+ qt_percentile_double_nan """select percentile(v_double, 0.5) from
percentile_nan_t;"""
+ qt_percentile_float_nan """select percentile(v_float, 0.5) from
percentile_nan_t;"""
+ qt_percentile_array_double_nan """select percentile_array(v_double, [0.25,
0.5, 0.75]) from percentile_nan_t;"""
+ qt_percentile_approx_double_nan """select percentile_approx(v_double, 0.5)
from percentile_nan_t;"""
+
+ sql """drop table if exists percentile_all_nan_t;"""
+ sql """
+ create table percentile_all_nan_t (
+ id int,
+ v_double double
+ ) duplicate key(id)
+ distributed by hash(id) buckets 1
+ properties("replication_num" = "1");
+ """
+ sql """
+ insert into percentile_all_nan_t values
+ (1, cast('nan' as double)),
+ (2, cast('nan' as double));
+ """
+
+ qt_percentile_all_nan """select percentile(v_double, 0.5) from
percentile_all_nan_t;"""
+ qt_percentile_array_all_nan """select percentile_array(v_double, [0.25,
0.5, 0.75]) from percentile_all_nan_t;"""
+ qt_percentile_approx_all_nan """select percentile_approx(v_double, 0.5)
from percentile_all_nan_t;"""
}
\ No newline at end of file
diff --git
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ap/percentile_approx.groovy
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ap/percentile_approx.groovy
index 576d22e5739..d3e6ddc28ba 100644
---
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ap/percentile_approx.groovy
+++
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ap/percentile_approx.groovy
@@ -49,6 +49,15 @@ suite("percentile_approx") {
properties("replication_num" = "1");
"""
+ qt_percentile_approx_tinyint_empty """select
percentile_approx(col_tinyint, 0.5) from d_table;"""
+ qt_percentile_approx_smallint_empty """select
percentile_approx(col_smallint, 0.5) from d_table;"""
+ qt_percentile_approx_int_empty """select percentile_approx(col_int, 0.5)
from d_table;"""
+ qt_percentile_approx_bigint_empty """select percentile_approx(col_bigint,
0.5) from d_table;"""
+ qt_percentile_approx_largeint_empty """select
percentile_approx(col_largeint, 0.5) from d_table;"""
+ qt_percentile_approx_float_empty """select percentile_approx(col_float,
0.5) from d_table;"""
+ qt_percentile_approx_double_empty """select percentile_approx(col_double,
0.5) from d_table;"""
+
+
// 插入测试数据
sql """
insert into d_table values
diff --git
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.groovy
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.groovy
index 6d3879ed612..9519304f482 100644
---
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.groovy
+++
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ap_w/percentile_approx_weighted.groovy
@@ -50,6 +50,14 @@ suite("percentile_approx_weighted") {
properties("replication_num" = "1");
"""
+ qt_percentile_approx_weighted_tinyint_empty """select
percentile_approx_weighted(col_tinyint, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_smallint_empty """select
percentile_approx_weighted(col_smallint, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_int_empty """select
percentile_approx_weighted(col_int, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_bigint_empty """select
percentile_approx_weighted(col_bigint, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_largeint_empty """select
percentile_approx_weighted(col_largeint, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_float_empty """select
percentile_approx_weighted(col_float, col_float, 0.5) from d_table;"""
+ qt_percentile_approx_weighted_double_empty """select
percentile_approx_weighted(col_double, col_float, 0.5) from d_table;"""
+
// 插入测试数据
sql """
insert into d_table values
diff --git
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ar/percentile_array.groovy
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ar/percentile_array.groovy
index 986b4888c4e..49ec041f88d 100644
---
a/regression-test/suites/query_p0/aggregate/support_type/percentile_ar/percentile_array.groovy
+++
b/regression-test/suites/query_p0/aggregate/support_type/percentile_ar/percentile_array.groovy
@@ -37,6 +37,13 @@ suite("percentile_array") {
distributed BY hash(k1) buckets 3
properties("replication_num" = "1");
"""
+ qt_percentile_array_tinyint_empty """select percentile_array(col_tinyint,
array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_smallint_empty """select
percentile_array(col_smallint, array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_int_empty """select percentile_array(col_int,
array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_bigint_empty """select percentile_array(col_bigint,
array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_largeint_empty """select
percentile_array(col_largeint, array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_float_empty """select percentile_array(col_float,
array(0.5, 0.8)) from d_table;"""
+ qt_percentile_array_double_empty """select percentile_array(col_double,
array(0.5, 0.8)) from d_table;"""
// 插入测试数据
sql """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]