On Thu, Aug 25, 2022 at 5:55 PM Benjamin Coutu <ben.co...@zeyos.com> wrote:
>
> Hello,
>
> Inspired by the recent discussions[1][2] around sort improvements, I took a 
> look around the code and noticed the use of a somewhat naive version of 
> insertion sort within the broader quicksort code.
>
> The current implementation (see sort_template.h) is practically the textbook 
> version of insertion sort:

Right. I think it's worth looking into. When I tested dual-pivot
quicksort, a threshold of 18 seemed best for my inputs, so making
insertion sort more efficient could tilt the balance more in favor of
dual-pivot. (It already seems slightly ahead, but as I mentioned in
the thread you linked, removing branches for null handling would make
it more compelling).

> DO_ASSIGN and DO_COPY macros would have to be declared analogue to DO_SWAP 
> via the template.

I don't think you need these macros, since this optimization is only
convenient if you know the type at compile time. See the attached,
which I had laying around when I was looking at PDQuicksort. I believe
it's similar to what you have in mind. (Ignore the part about
"unguarded", it's irrelevant out of context). Notice that it avoids
unnecessary copies, but has two calls to DO_COMPARE, which is *not*
small for tuples.

> Anyways, there might be some low hanging fruit here. If it turns out to be 
> significantly faster one might even consider increasing the insertion sort 
> threshold from < 7 to < 10 sort elements. But that is a whole other 
> discussion for another day.

I think it belongs around 10 now anyway. I also don't think you'll see
much benefit with your proposal at a threshold of 7 -- I suspect it'd
be more enlightening to test a range of thresholds with and without
the patch, to see how the inflection point shifts. That worked pretty
well when testing dual-pivot.

-- 
John Naylor
EDB: http://www.enterprisedb.com
From fd4bfdbee88478fac32838712c112d91f73c5db5 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 4 May 2022 23:35:06 +0700
Subject: [PATCH v3 8/8] Use unguarded insertion sort where possible

Since we recurse to the partitions in order, guarding is only
needed at the very beginning of the input. It's not yet clear
if this is a performance win for sort keys with complex
comparators.
---
 src/include/lib/sort_template.h        | 58 +++++++++++++++++++++++++-
 src/test/regress/expected/geometry.out |  2 +-
 2 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/src/include/lib/sort_template.h b/src/include/lib/sort_template.h
index 3b3c4bc128..ae76fd39e8 100644
--- a/src/include/lib/sort_template.h
+++ b/src/include/lib/sort_template.h
@@ -79,7 +79,6 @@
  *		https://github.com/orlp/pdqsort
  *
  * WIP: Not yet implemented:
- *  - use unguarded insertion sort where possible
  *  - fall back to heap sort to guard the stack
  *
  * Other techniques used in PDQuicksort, but likely not worth adopting:
@@ -174,6 +173,7 @@ ST_SCOPE void ST_SORT(ST_ELEMENT_TYPE * first, size_t n
 /* sort private helper functions */
 #define ST_INSERTION_SORT ST_MAKE_NAME(ST_SORT, insertion_sort)
 #define ST_INSERTION_SORT_PARTIAL ST_MAKE_NAME(ST_SORT, insertion_sort_partial)
+#define ST_INSERTION_SORT_UNGUARDED ST_MAKE_NAME(ST_SORT, insertion_sort_unguarded)
 #define ST_PARTITION_LEFT ST_MAKE_NAME(ST_SORT, partition_left)
 #define ST_PARTITION_RIGHT ST_MAKE_NAME(ST_SORT, partition_right)
 #define ST_SORT_INTERNAL ST_MAKE_NAME(ST_SORT, internal)
@@ -213,6 +213,12 @@ ST_SCOPE void ST_SORT(ST_ELEMENT_TYPE * first, size_t n
 			ST_SORT_INVOKE_COMPARE										\
 			ST_SORT_INVOKE_ARG)
 
+#define DO_INSERTION_SORT_UNGUARDED(begin_, end_)						\
+	ST_INSERTION_SORT_UNGUARDED((begin_), (end_)						\
+			ST_SORT_INVOKE_ELEMENT_SIZE									\
+			ST_SORT_INVOKE_COMPARE										\
+			ST_SORT_INVOKE_ARG)
+
 #define DO_PARTITION_LEFT(begin_, end_)									\
 	ST_PARTITION_LEFT((begin_), (end_)									\
 			ST_SORT_INVOKE_ELEMENT_SIZE									\
@@ -402,6 +408,48 @@ ST_INSERTION_SORT_PARTIAL(ST_POINTER_TYPE * begin,
 	return true;
 }
 
+static inline void
+ST_INSERTION_SORT_UNGUARDED(ST_POINTER_TYPE * begin,
+				  ST_POINTER_TYPE * end
+				  ST_SORT_PROTO_ELEMENT_SIZE
+				  ST_SORT_PROTO_COMPARE
+				  ST_SORT_PROTO_ARG)
+{
+	ST_POINTER_TYPE *cur;
+	ST_POINTER_TYPE *sift;
+	ST_POINTER_TYPE *sift_1;
+
+	if (begin == end)
+		return;
+
+	for (cur = begin + ST_POINTER_STEP; cur < end; cur += ST_POINTER_STEP)
+	{
+#ifndef ST_ELEMENT_TYPE_VOID
+		sift = cur;
+		sift_1 = cur - 1;
+
+		if (DO_COMPARE(sift_1, sift) > 0)
+		{
+			ST_ELEMENT_TYPE tmp;
+			tmp = *sift;
+
+			do
+			{
+				*sift-- = *sift_1;
+				sift_1--; /* Avoid multiple evaluation in DO_COMPARE */
+			} while (DO_COMPARE(sift_1, &tmp) > 0);
+
+			*sift = tmp;
+		}
+#else
+		for (sift = cur, sift_1 = cur - ST_POINTER_STEP;
+			 DO_COMPARE(sift_1, sift) > 0;
+			 sift -= ST_POINTER_STEP, sift_1 -= ST_POINTER_STEP)
+			DO_SWAP(sift, sift_1);
+#endif
+	}
+}
+
 // Partitions [begin, end) around pivot *begin. Elements equal
 // to the pivot are put in the right-hand partition. Returns the position of the pivot after
 // partitioning and whether the passed sequence was already correctly partitioned. Assumes the
@@ -540,7 +588,11 @@ loop:
 	n = (end - begin) / ST_POINTER_STEP;
 	if (n < ST_THRESHOLD_INSERTION_SORT)
 	{
-		DO_INSERTION_SORT(begin, end);
+		if (leftmost)
+			DO_INSERTION_SORT(begin, end);
+		else
+			DO_INSERTION_SORT_UNGUARDED(begin, end);
+
 		return;
 	}
 
@@ -658,6 +710,7 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n
 #undef DO_COMPARE
 #undef DO_INSERTION_SORT
 #undef DO_INSERTION_SORT_PARTIAL
+#undef DO_INSERTION_SORT_UNGUARDED
 #undef DO_PARTITION_LEFT
 #undef DO_PARTITION_RIGHT
 #undef DO_SORT
@@ -674,6 +727,7 @@ ST_SORT(ST_ELEMENT_TYPE * data, size_t n
 #undef ST_ELEMENT_TYPE_VOID
 #undef ST_INSERTION_SORT
 #undef ST_INSERTION_SORT_PARTIAL
+#undef ST_INSERTION_SORT_UNGUARDED
 #undef ST_MAKE_NAME
 #undef ST_MAKE_NAME_
 #undef ST_MAKE_PREFIX
diff --git a/src/test/regress/expected/geometry.out b/src/test/regress/expected/geometry.out
index 975a1f5695..5eb0b29b16 100644
--- a/src/test/regress/expected/geometry.out
+++ b/src/test/regress/expected/geometry.out
@@ -4321,8 +4321,8 @@ SELECT c1.f1 AS circle, p1.f1 AS point, (p1.f1 <-> c1.f1) AS distance
  <(100,1),115>  | (1e+300,Infinity) |      Infinity
  <(100,1),115>  | (Infinity,1e+300) |      Infinity
  <(3,5),0>      | (NaN,NaN)         |           NaN
- <(1,2),3>      | (NaN,NaN)         |           NaN
  <(5,1),3>      | (NaN,NaN)         |           NaN
+ <(1,2),3>      | (NaN,NaN)         |           NaN
  <(1,3),5>      | (NaN,NaN)         |           NaN
  <(100,200),10> | (NaN,NaN)         |           NaN
  <(1,2),100>    | (NaN,NaN)         |           NaN
-- 
2.35.1

Reply via email to