This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 0fe67a8e763 [Performance](opt) opt the order by performance in 
permutation (#39092)
0fe67a8e763 is described below

commit 0fe67a8e76397b120427f8105f19da23f387d724
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Sat Aug 10 19:37:30 2024 +0800

    [Performance](opt) opt the order by performance in permutation (#39092)
    
    Issue Number: cherry pick #38985
---
 be/src/vec/columns/column_decimal.h  | 25 +++++++++++++++++--------
 be/src/vec/columns/column_string.cpp |  9 ++++-----
 be/src/vec/columns/column_vector.cpp |  3 ++-
 3 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/be/src/vec/columns/column_decimal.h 
b/be/src/vec/columns/column_decimal.h
index 8d10fb806e4..26ec505e426 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -21,6 +21,7 @@
 #pragma once
 
 #include <glog/logging.h>
+#include <pdqsort.h>
 #include <stdint.h>
 #include <string.h>
 #include <sys/types.h>
@@ -294,14 +295,22 @@ protected:
         for (U i = 0; i < s; ++i) res[i] = i;
 
         auto sort_end = res.end();
-        if (limit && limit < s) sort_end = res.begin() + limit;
-
-        if (reverse)
-            std::partial_sort(res.begin(), sort_end, res.end(),
-                              [this](size_t a, size_t b) { return data[a] > 
data[b]; });
-        else
-            std::partial_sort(res.begin(), sort_end, res.end(),
-                              [this](size_t a, size_t b) { return data[a] < 
data[b]; });
+        if (limit && limit < s / 8.0) {
+            sort_end = res.begin() + limit;
+            if (reverse)
+                std::partial_sort(res.begin(), sort_end, res.end(),
+                                  [this](size_t a, size_t b) { return data[a] 
> data[b]; });
+            else
+                std::partial_sort(res.begin(), sort_end, res.end(),
+                                  [this](size_t a, size_t b) { return data[a] 
< data[b]; });
+        } else {
+            if (reverse)
+                pdqsort(res.begin(), res.end(),
+                        [this](size_t a, size_t b) { return data[a] > data[b]; 
});
+            else
+                pdqsort(res.begin(), res.end(),
+                        [this](size_t a, size_t b) { return data[a] < data[b]; 
});
+        }
     }
 
     void ALWAYS_INLINE decimalv2_do_crc(size_t i, uint64_t& hash) const {
diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 5d2670acb78..e5f900f62a0 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -381,9 +381,8 @@ void ColumnString::get_permutation(bool reverse, size_t 
limit, int /*nan_directi
         res[i] = i;
     }
 
-    if (limit >= s) {
-        limit = 0;
-    }
+    // std::partial_sort need limit << s can get performance benefit
+    if (limit > (s / 8.0)) limit = 0;
 
     if (limit) {
         if (reverse) {
@@ -393,9 +392,9 @@ void ColumnString::get_permutation(bool reverse, size_t 
limit, int /*nan_directi
         }
     } else {
         if (reverse) {
-            std::sort(res.begin(), res.end(), less<false>(*this));
+            pdqsort(res.begin(), res.end(), less<false>(*this));
         } else {
-            std::sort(res.begin(), res.end(), less<true>(*this));
+            pdqsort(res.begin(), res.end(), less<true>(*this));
         }
     }
 }
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index 1c96f4f2e6c..c12b14dd57e 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -245,7 +245,8 @@ void ColumnVector<T>::get_permutation(bool reverse, size_t 
limit, int nan_direct
 
     if (s == 0) return;
 
-    if (limit >= s) limit = 0;
+    // std::partial_sort need limit << s can get performance benefit
+    if (limit > (s / 8.0)) limit = 0;
 
     if (limit) {
         for (size_t i = 0; i < s; ++i) res[i] = i;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to