This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 0fe67a8e763 [Performance](opt) opt the order by performance in permutation (#39092) 0fe67a8e763 is described below commit 0fe67a8e76397b120427f8105f19da23f387d724 Author: HappenLee <happen...@hotmail.com> AuthorDate: Sat Aug 10 19:37:30 2024 +0800 [Performance](opt) opt the order by performance in permutation (#39092) Issue Number: cherry pick #38985 --- be/src/vec/columns/column_decimal.h | 25 +++++++++++++++++-------- be/src/vec/columns/column_string.cpp | 9 ++++----- be/src/vec/columns/column_vector.cpp | 3 ++- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 8d10fb806e4..26ec505e426 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -21,6 +21,7 @@ #pragma once #include <glog/logging.h> +#include <pdqsort.h> #include <stdint.h> #include <string.h> #include <sys/types.h> @@ -294,14 +295,22 @@ protected: for (U i = 0; i < s; ++i) res[i] = i; auto sort_end = res.end(); - if (limit && limit < s) sort_end = res.begin() + limit; - - if (reverse) - std::partial_sort(res.begin(), sort_end, res.end(), - [this](size_t a, size_t b) { return data[a] > data[b]; }); - else - std::partial_sort(res.begin(), sort_end, res.end(), - [this](size_t a, size_t b) { return data[a] < data[b]; }); + if (limit && limit < s / 8.0) { + sort_end = res.begin() + limit; + if (reverse) + std::partial_sort(res.begin(), sort_end, res.end(), + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + std::partial_sort(res.begin(), sort_end, res.end(), + [this](size_t a, size_t b) { return data[a] < data[b]; }); + } else { + if (reverse) + pdqsort(res.begin(), res.end(), + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + pdqsort(res.begin(), res.end(), + [this](size_t a, size_t b) { return data[a] < data[b]; }); + } } void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint64_t& hash) const { diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 5d2670acb78..e5f900f62a0 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -381,9 +381,8 @@ void ColumnString::get_permutation(bool reverse, size_t limit, int /*nan_directi res[i] = i; } - if (limit >= s) { - limit = 0; - } + // std::partial_sort need limit << s can get performance benefit + if (limit > (s / 8.0)) limit = 0; if (limit) { if (reverse) { @@ -393,9 +392,9 @@ void ColumnString::get_permutation(bool reverse, size_t limit, int /*nan_directi } } else { if (reverse) { - std::sort(res.begin(), res.end(), less<false>(*this)); + pdqsort(res.begin(), res.end(), less<false>(*this)); } else { - std::sort(res.begin(), res.end(), less<true>(*this)); + pdqsort(res.begin(), res.end(), less<true>(*this)); } } } diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index 1c96f4f2e6c..c12b14dd57e 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -245,7 +245,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t limit, int nan_direct if (s == 0) return; - if (limit >= s) limit = 0; + // std::partial_sort need limit << s can get performance benefit + if (limit > (s / 8.0)) limit = 0; if (limit) { for (size_t i = 0; i < s; ++i) res[i] = i; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org