On Thu, Aug 25, 2022 at 5:55 PM Benjamin Coutu <ben.co...@zeyos.com> wrote: > > Hello, > > Inspired by the recent discussions[1][2] around sort improvements, I took a > look around the code and noticed the use of a somewhat naive version of > insertion sort within the broader quicksort code. > > The current implementation (see sort_template.h) is practically the textbook > version of insertion sort:
Right. I think it's worth looking into. When I tested dual-pivot quicksort, a threshold of 18 seemed best for my inputs, so making insertion sort more efficient could tilt the balance more in favor of dual-pivot. (It already seems slightly ahead, but as I mentioned in the thread you linked, removing branches for null handling would make it more compelling). > DO_ASSIGN and DO_COPY macros would have to be declared analogue to DO_SWAP > via the template. I don't think you need these macros, since this optimization is only convenient if you know the type at compile time. See the attached, which I had laying around when I was looking at PDQuicksort. I believe it's similar to what you have in mind. (Ignore the part about "unguarded", it's irrelevant out of context). Notice that it avoids unnecessary copies, but has two calls to DO_COMPARE, which is *not* small for tuples. > Anyways, there might be some low hanging fruit here. If it turns out to be > significantly faster one might even consider increasing the insertion sort > threshold from < 7 to < 10 sort elements. But that is a whole other > discussion for another day. I think it belongs around 10 now anyway. I also don't think you'll see much benefit with your proposal at a threshold of 7 -- I suspect it'd be more enlightening to test a range of thresholds with and without the patch, to see how the inflection point shifts. That worked pretty well when testing dual-pivot. -- John Naylor EDB: http://www.enterprisedb.com
From fd4bfdbee88478fac32838712c112d91f73c5db5 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 4 May 2022 23:35:06 +0700 Subject: [PATCH v3 8/8] Use unguarded insertion sort where possible Since we recurse to the partitions in order, guarding is only needed at the very beginning of the input. It's not yet clear if this is a performance win for sort keys with complex comparators. --- src/include/lib/sort_template.h | 58 +++++++++++++++++++++++++- src/test/regress/expected/geometry.out | 2 +- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/src/include/lib/sort_template.h b/src/include/lib/sort_template.h index 3b3c4bc128..ae76fd39e8 100644 --- a/src/include/lib/sort_template.h +++ b/src/include/lib/sort_template.h @@ -79,7 +79,6 @@ * https://github.com/orlp/pdqsort * * WIP: Not yet implemented: - * - use unguarded insertion sort where possible * - fall back to heap sort to guard the stack * * Other techniques used in PDQuicksort, but likely not worth adopting: @@ -174,6 +173,7 @@ ST_SCOPE void ST_SORT(ST_ELEMENT_TYPE * first, size_t n /* sort private helper functions */ #define ST_INSERTION_SORT ST_MAKE_NAME(ST_SORT, insertion_sort) #define ST_INSERTION_SORT_PARTIAL ST_MAKE_NAME(ST_SORT, insertion_sort_partial) +#define ST_INSERTION_SORT_UNGUARDED ST_MAKE_NAME(ST_SORT, insertion_sort_unguarded) #define ST_PARTITION_LEFT ST_MAKE_NAME(ST_SORT, partition_left) #define ST_PARTITION_RIGHT ST_MAKE_NAME(ST_SORT, partition_right) #define ST_SORT_INTERNAL ST_MAKE_NAME(ST_SORT, internal) @@ -213,6 +213,12 @@ ST_SCOPE void ST_SORT(ST_ELEMENT_TYPE * first, size_t n ST_SORT_INVOKE_COMPARE \ ST_SORT_INVOKE_ARG) +#define DO_INSERTION_SORT_UNGUARDED(begin_, end_) \ + ST_INSERTION_SORT_UNGUARDED((begin_), (end_) \ + ST_SORT_INVOKE_ELEMENT_SIZE \ + ST_SORT_INVOKE_COMPARE \ + ST_SORT_INVOKE_ARG) + #define DO_PARTITION_LEFT(begin_, end_) \ ST_PARTITION_LEFT((begin_), (end_) \ ST_SORT_INVOKE_ELEMENT_SIZE \ @@ -402,6 +408,48 @@ ST_INSERTION_SORT_PARTIAL(ST_POINTER_TYPE * begin, return true; } +static inline void +ST_INSERTION_SORT_UNGUARDED(ST_POINTER_TYPE * begin, + ST_POINTER_TYPE * end + ST_SORT_PROTO_ELEMENT_SIZE + ST_SORT_PROTO_COMPARE + ST_SORT_PROTO_ARG) +{ + ST_POINTER_TYPE *cur; + ST_POINTER_TYPE *sift; + ST_POINTER_TYPE *sift_1; + + if (begin == end) + return; + + for (cur = begin + ST_POINTER_STEP; cur < end; cur += ST_POINTER_STEP) + { +#ifndef ST_ELEMENT_TYPE_VOID + sift = cur; + sift_1 = cur - 1; + + if (DO_COMPARE(sift_1, sift) > 0) + { + ST_ELEMENT_TYPE tmp; + tmp = *sift; + + do + { + *sift-- = *sift_1; + sift_1--; /* Avoid multiple evaluation in DO_COMPARE */ + } while (DO_COMPARE(sift_1, &tmp) > 0); + + *sift = tmp; + } +#else + for (sift = cur, sift_1 = cur - ST_POINTER_STEP; + DO_COMPARE(sift_1, sift) > 0; + sift -= ST_POINTER_STEP, sift_1 -= ST_POINTER_STEP) + DO_SWAP(sift, sift_1); +#endif + } +} + // Partitions [begin, end) around pivot *begin. Elements equal // to the pivot are put in the right-hand partition. Returns the position of the pivot after // partitioning and whether the passed sequence was already correctly partitioned. Assumes the @@ -540,7 +588,11 @@ loop: n = (end - begin) / ST_POINTER_STEP; if (n < ST_THRESHOLD_INSERTION_SORT) { - DO_INSERTION_SORT(begin, end); + if (leftmost) + DO_INSERTION_SORT(begin, end); + else + DO_INSERTION_SORT_UNGUARDED(begin, end); + return; } @@ -658,6 +710,7 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n #undef DO_COMPARE #undef DO_INSERTION_SORT #undef DO_INSERTION_SORT_PARTIAL +#undef DO_INSERTION_SORT_UNGUARDED #undef DO_PARTITION_LEFT #undef DO_PARTITION_RIGHT #undef DO_SORT @@ -674,6 +727,7 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n #undef ST_ELEMENT_TYPE_VOID #undef ST_INSERTION_SORT #undef ST_INSERTION_SORT_PARTIAL +#undef ST_INSERTION_SORT_UNGUARDED #undef ST_MAKE_NAME #undef ST_MAKE_NAME_ #undef ST_MAKE_PREFIX diff --git a/src/test/regress/expected/geometry.out b/src/test/regress/expected/geometry.out index 975a1f5695..5eb0b29b16 100644 --- a/src/test/regress/expected/geometry.out +++ b/src/test/regress/expected/geometry.out @@ -4321,8 +4321,8 @@ SELECT c1.f1 AS circle, p1.f1 AS point, (p1.f1 <-> c1.f1) AS distance <(100,1),115> | (1e+300,Infinity) | Infinity <(100,1),115> | (Infinity,1e+300) | Infinity <(3,5),0> | (NaN,NaN) | NaN - <(1,2),3> | (NaN,NaN) | NaN <(5,1),3> | (NaN,NaN) | NaN + <(1,2),3> | (NaN,NaN) | NaN <(1,3),5> | (NaN,NaN) | NaN <(100,200),10> | (NaN,NaN) | NaN <(1,2),100> | (NaN,NaN) | NaN -- 2.35.1