Hi,

David Rowley privately reported a performance regression when sorting
single ints with a lot of duplicates, in a case that previously hit
qsort_ssup() but now hits qsort_tuple_int32() and then has to call the
tiebreaker comparator.  Note that this comes up only for sorts in a
query, not for eg index builds which always have to tiebreak on item
ptr.  I don't have data right now but that'd likely be due to:

+ * XXX: For now, there is no specialization for cases where datum1 is
+ * authoritative and we don't even need to fall back to a callback at all (that
+ * would be true for types like int4/int8/timestamp/date, but not true for
+ * abbreviations of text or multi-key sorts.  There could be!  Is it worth it?

Upthread we were discussing which variations it'd be worth investing
extra text segment space on to gain speedup and we put those hard
decisions off for future work, but on reflection, we probably should
tackle this particular point to avoid a regression.  I think something
like the attached achieves that (draft, not tested much yet, could
perhaps find a tidier way to code the decision tree).  In short:
variants qsort_tuple_{int32,signed,unsigned}() no longer fall back,
but new variants qsort_tuple_{int32,signed,unsigned}_tiebreak() do.

We should perhaps also reconsider the other XXX comment about finding
a way to skip the retest of column 1 in the tiebreak comparator.
Perhaps you'd just install a different comparetup function, eg
comparetup_index_btree_tail (which would sharing code), so no need to
multiply specialisations for that.

Planning to look at this more closely after I've sorted out some other
problems, but thought I'd post this draft/problem report early in case
John or others have thoughts or would like to run some experiments.
From 3ff95aedb1065393a0ae1496cedb463bc998a329 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 6 Apr 2022 11:38:31 +1200
Subject: [PATCH] Specialize tuplesort for keys without tiebreaker.

Commit 69749243 noted a future opportunity to avoid calling the
comparator function in the case of single column non-abbreviated sort,
where datum1 alone is enough to decide the order.

David Rowley reported a performance regression in single-column integer
sorts with a lot of duplicates, where previously the qsort_ssup()
specialization was reached, due to extra calls to the comparator.  It
seems like a good idea to fix that, so let's complete that TODO item
now.

XXX WIP, is the if-then-else getting too ugly, maybe switch to a table of
functions to search?
XXX needs testing, experiments
---
 src/backend/utils/sort/tuplesort.c | 110 +++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 21 deletions(-)

diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 571fb95532..dc16e6c242 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -312,6 +312,14 @@ struct Tuplesortstate
 	 */
 	bool		haveDatum1;
 
+	/*
+	 * Whether the comparator function must always be called for tuples with
+	 * equal datum1.  This should be set to true if the sorting has extra
+	 * 'invisible' sort order beyond the regular keys, to disable optimizations
+	 * that would otherwise skip the function call.
+	 */
+	bool		alwaysTiebreak;
+
 	/*
 	 * This array holds the tuples now in sort memory.  If we are in state
 	 * INITIAL, the tuples are in no particular order; if we are in state
@@ -682,11 +690,6 @@ static void tuplesort_updatemax(Tuplesortstate *state);
  *
  * XXX: For now, these fall back to comparator functions that will compare the
  * leading datum a second time.
- *
- * XXX: For now, there is no specialization for cases where datum1 is
- * authoritative and we don't even need to fall back to a callback at all (that
- * would be true for types like int4/int8/timestamp/date, but not true for
- * abbreviations of text or multi-key sorts.  There could be!  Is it worth it?
  */
 
 /* Used if first key's comparator is ssup_datum_unsigned_compare */
@@ -740,10 +743,12 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
  * qsort_ssup() is specialized for the case where the comparetup function
  * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts
  * and Datum sorts.  qsort_tuple_{unsigned,signed,int32} are specialized for
- * common comparison functions on pass-by-value leading datums.
+ * common comparison functions on pass-by-value leading datums.  The _tiebreak
+ * variants are for when it is necessary to call the comparator function even
+ * if datum1 is equal.
  */
 
-#define ST_SORT qsort_tuple_unsigned
+#define ST_SORT qsort_tuple_unsigned_tiebreak
 #define ST_ELEMENT_TYPE SortTuple
 #define ST_COMPARE(a, b, state) qsort_tuple_unsigned_compare(a, b, state)
 #define ST_COMPARE_ARG_TYPE Tuplesortstate
@@ -752,7 +757,7 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 #define ST_DEFINE
 #include "lib/sort_template.h"
 
-#define ST_SORT qsort_tuple_signed
+#define ST_SORT qsort_tuple_signed_tiebreak
 #define ST_ELEMENT_TYPE SortTuple
 #define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state)
 #define ST_COMPARE_ARG_TYPE Tuplesortstate
@@ -761,7 +766,7 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 #define ST_DEFINE
 #include "lib/sort_template.h"
 
-#define ST_SORT qsort_tuple_int32
+#define ST_SORT qsort_tuple_int32_tiebreak
 #define ST_ELEMENT_TYPE SortTuple
 #define ST_COMPARE(a, b, state) qsort_tuple_int32_compare(a, b, state)
 #define ST_COMPARE_ARG_TYPE Tuplesortstate
@@ -770,6 +775,33 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state)
 #define ST_DEFINE
 #include "lib/sort_template.h"
 
+#define ST_SORT qsort_tuple_unsigned
+#define ST_ELEMENT_TYPE SortTuple
+#define ST_COMPARE(a, b, state) ApplyUnsignedSortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, &state->sortKeys[0])
+#define ST_COMPARE_ARG_TYPE Tuplesortstate
+#define ST_CHECK_FOR_INTERRUPTS
+#define ST_SCOPE static
+#define ST_DEFINE
+#include "lib/sort_template.h"
+
+#define ST_SORT qsort_tuple_signed
+#define ST_ELEMENT_TYPE SortTuple
+#define ST_COMPARE(a, b, state) ApplySignedSortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, &state->sortKeys[0])
+#define ST_COMPARE_ARG_TYPE Tuplesortstate
+#define ST_CHECK_FOR_INTERRUPTS
+#define ST_SCOPE static
+#define ST_DEFINE
+#include "lib/sort_template.h"
+
+#define ST_SORT qsort_tuple_int32
+#define ST_ELEMENT_TYPE SortTuple
+#define ST_COMPARE(a, b, state) ApplyInt32SortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, &state->sortKeys[0])
+#define ST_COMPARE_ARG_TYPE Tuplesortstate
+#define ST_CHECK_FOR_INTERRUPTS
+#define ST_SCOPE static
+#define ST_DEFINE
+#include "lib/sort_template.h"
+
 #define ST_SORT qsort_tuple
 #define ST_ELEMENT_TYPE SortTuple
 #define ST_COMPARE_RUNTIME_POINTER
@@ -1215,6 +1247,12 @@ tuplesort_begin_index_btree(Relation heapRel,
 	state->abbrevNext = 10;
 	state->haveDatum1 = true;
 
+	/*
+	 * Since we sort equal keys by ItemPointer, we can't skip the comparator
+	 * call.
+	 */
+	state->alwaysTiebreak = true;
+
 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
 	state->enforceUnique = enforceUnique;
@@ -3632,28 +3670,58 @@ tuplesort_sort_memtuples(Tuplesortstate *state)
 		 */
 		if (state->haveDatum1 && state->sortKeys)
 		{
+			bool		tiebreak;
+
+			/*
+			 * If there's only a single key, and the value in datum1 is not a
+			 * lossy abbreviation, then we don't need to call a tie-breaking
+			 * comparator function for equal datum1 values unless explicitly
+			 * asked to do so (eg btrees).
+			 */
+			tiebreak = state->nKeys > 1 ||
+				state->sortKeys[0].abbrev_converter ||
+				state->alwaysTiebreak;
+
 			if (state->sortKeys[0].comparator == ssup_datum_unsigned_cmp)
 			{
-				elog(DEBUG1, "qsort_tuple_unsigned");
-				qsort_tuple_unsigned(state->memtuples,
-									 state->memtupcount,
-									 state);
+				elog(DEBUG1, "qsort_tuple_unsigned%s",
+					 tiebreak ? "_tiebreak" : "");
+				if (tiebreak)
+					qsort_tuple_unsigned_tiebreak(state->memtuples,
+												  state->memtupcount,
+												  state);
+				else
+					qsort_tuple_unsigned(state->memtuples,
+										 state->memtupcount,
+										 state);
 				return;
 			}
 			else if (state->sortKeys[0].comparator == ssup_datum_signed_cmp)
 			{
-				elog(DEBUG1, "qsort_tuple_signed");
-				qsort_tuple_signed(state->memtuples,
-								   state->memtupcount,
-								   state);
+				elog(DEBUG1, "qsort_tuple_signed%s",
+					 tiebreak ? "_tiebreak" : "");
+				if (tiebreak)
+					qsort_tuple_signed_tiebreak(state->memtuples,
+												state->memtupcount,
+												state);
+				else
+					qsort_tuple_signed(state->memtuples,
+									   state->memtupcount,
+									   state);
 				return;
 			}
 			else if (state->sortKeys[0].comparator == ssup_datum_int32_cmp)
 			{
-				elog(DEBUG1, "qsort_tuple_int32");
-				qsort_tuple_int32(state->memtuples,
-								  state->memtupcount,
-								  state);
+				elog(DEBUG1, "qsort_tuple_int32%s",
+					 tiebreak ? "_tiebreak" : "");
+				if (tiebreak)
+					qsort_tuple_int32_tiebreak(state->memtuples,
+											   state->memtupcount,
+											   state);
+				else
+					qsort_tuple_int32(state->memtuples,
+									  state->memtupcount,
+									  state);
 				return;
 			}
 		}
-- 
2.30.2

Reply via email to