[pruning the Cc: list and starting a new thread]

Here's the cleaned-up version of the patch to allow abbreviated keys
when sorting a single Datum. This also removes comments that suggest
that the caller of tuplesort_begin_datum should ever have to care
about the abbreviated key optimization.

I'll add this to the CF.

-- 
Andrew (irc:RhodiumToad)


diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 8079d97..08088ea 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -363,10 +363,6 @@ initialize_aggregates(AggState *aggstate,
 			 * We use a plain Datum sorter when there's a single input column;
 			 * otherwise sort the full tuple.  (See comments for
 			 * process_ordered_aggregate_single.)
-			 *
-			 * In the future, we should consider forcing the
-			 * tuplesort_begin_heap() case when the abbreviated key
-			 * optimization can thereby be used, even when numInputs is 1.
 			 */
 			peraggstate->sortstate =
 				(peraggstate->numInputs == 1) ?
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index f9a5f7f..39ed85b 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -268,10 +268,6 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 
 	/*
 	 * Initialize tuplesort object.
-	 *
-	 * In the future, we should consider forcing the tuplesort_begin_heap()
-	 * case when the abbreviated key optimization can thereby be used, even
-	 * when !use_tuples.
 	 */
 	if (use_tuples)
 		osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index b6e302f..1f506bf 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -147,13 +147,18 @@ bool		optimize_bounded_sort = true;
  * case where the first key determines the comparison result.  Note that
  * for a pass-by-reference datatype, datum1 points into the "tuple" storage.
  *
+ * There is one special case: when the sort support infrastructure provides an
+ * "abbreviated key" representation, where the key is (typically) a pass by
+ * value proxy for a pass by reference type.  In this case, the abbreviated key
+ * is stored in datum1 in place of the actual first key column.
+ *
  * When sorting single Datums, the data value is represented directly by
- * datum1/isnull1.  If the datatype is pass-by-reference and isnull1 is false,
- * then datum1 points to a separately palloc'd data value that is also pointed
- * to by the "tuple" pointer; otherwise "tuple" is NULL.  There is one special
- * case:  when the sort support infrastructure provides an "abbreviated key"
- * representation, where the key is (typically) a pass by value proxy for a
- * pass by reference type.
+ * datum1/isnull1 for pass by value types (or null values).  If the datatype is
+ * pass-by-reference and isnull1 is false, then "tuple" points to a separately
+ * palloc'd data value, otherwise "tuple" is NULL.  The value of datum1 is then
+ * either the same pointer as "tuple", or is an abbreviated key value as
+ * described above.  Accordingly, "tuple" is always used in preference to
+ * datum1 as the authoritative value for pass-by-reference cases.
  *
  * While building initial runs, tupindex holds the tuple's run number.  During
  * merge passes, we re-use it to hold the input tape number that each tuple in
@@ -901,30 +906,36 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->copytup = copytup_datum;
 	state->writetup = writetup_datum;
 	state->readtup = readtup_datum;
+	state->abbrevNext = 10;
 
 	state->datumType = datumType;
 
-	/* Prepare SortSupport data */
-	state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData));
-
-	state->onlyKey->ssup_cxt = CurrentMemoryContext;
-	state->onlyKey->ssup_collation = sortCollation;
-	state->onlyKey->ssup_nulls_first = nullsFirstFlag;
-	/*
-	 * Conversion to abbreviated representation infeasible in the Datum case.
-	 * It must be possible to subsequently fetch original datum values within
-	 * tuplesort_getdatum(), which would require special-case preservation of
-	 * original values.
-	 */
-	state->onlyKey->abbreviate = false;
-
-	PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
-
 	/* lookup necessary attributes of the datum type */
 	get_typlenbyval(datumType, &typlen, &typbyval);
 	state->datumTypeLen = typlen;
 	state->datumTypeByVal = typbyval;
 
+	/* Prepare SortSupport data */
+	state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
+
+	state->sortKeys->ssup_cxt = CurrentMemoryContext;
+	state->sortKeys->ssup_collation = sortCollation;
+	state->sortKeys->ssup_nulls_first = nullsFirstFlag;
+
+	/* abbreviation is possible here only for by-reference types */
+	state->sortKeys->abbreviate = !typbyval;
+
+	PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys);
+
+	/*
+	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
+	 * tie-breaker comparisons may be required.  Typically, the optimization is
+	 * only of value to pass-by-value types anyway, whereas abbreviated keys
+	 * are typically only of value to pass-by-reference types.
+	 */
+	if (!state->sortKeys->abbrev_converter)
+		state->onlyKey = state->sortKeys;
+
 	MemoryContextSwitchTo(oldcontext);
 
 	return state;
@@ -1307,9 +1318,17 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	SortTuple	stup;
 
 	/*
-	 * If it's a pass-by-reference value, copy it into memory we control, and
-	 * decrease availMem.  Then call the common code.
+	 * Pass-by-value types or null values are just stored directly in
+	 * stup.datum1 (and stup.tuple is not used and set to NULL).
+	 *
+	 * Non-null pass-by-reference values need to be copied into memory we
+	 * control, and possibly abbreviated. The copied value is pointed to by
+	 * stup.tuple and is treated as the canonical copy (e.g. to return via
+	 * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
+	 * abbreviated value if abbreviation is happening, otherwise it's identical
+	 * to stup.tuple.
 	 */
+
 	if (isNull || state->datumTypeByVal)
 	{
 		stup.datum1 = val;
@@ -1318,10 +1337,44 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	}
 	else
 	{
-		stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+		Datum	original = datumCopy(val, false, state->datumTypeLen);
+
 		stup.isnull1 = false;
-		stup.tuple = DatumGetPointer(stup.datum1);
+		stup.tuple = DatumGetPointer(original);
 		USEMEM(state, GetMemoryChunkSpace(stup.tuple));
+
+		if (!state->sortKeys->abbrev_converter)
+		{
+			stup.datum1 = original;
+		}
+		else if (!consider_abort_common(state))
+		{
+			/* Store abbreviated key representation */
+			stup.datum1 = state->sortKeys->abbrev_converter(original,
+															state->sortKeys);
+		}
+		else
+		{
+			/* Abort abbreviation */
+			int		i;
+
+			stup.datum1 = original;
+
+			/*
+			 * Set state to be consistent with never trying abbreviation.
+			 *
+			 * Alter datum1 representation in already-copied tuples, so as to
+			 * ensure a consistent representation (current tuple was just handled).
+			 * Note that we rely on all tuples copied so far actually being
+			 * contained within memtuples array.
+			 */
+			for (i = 0; i < state->memtupcount; i++)
+			{
+				SortTuple *mtup = &state->memtuples[i];
+
+				mtup->datum1 = PointerGetDatum(mtup->tuple);
+			}
+		}
 	}
 
 	puttuple_common(state, &stup);
@@ -1886,10 +1939,12 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
 	}
 	else
 	{
+		/* use stup.tuple because stup.datum1 may be an abbreviation */
+
 		if (should_free)
-			*val = stup.datum1;
+			*val = PointerGetDatum(stup.tuple);
 		else
-			*val = datumCopy(stup.datum1, false, state->datumTypeLen);
+			*val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen);
 		*isNull = false;
 	}
 
@@ -3712,9 +3767,24 @@ readtup_index(Tuplesortstate *state, SortTuple *stup,
 static int
 comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 {
-	return ApplySortComparator(a->datum1, a->isnull1,
-							   b->datum1, b->isnull1,
-							   state->onlyKey);
+	int		compare;
+
+	compare = ApplySortComparator(a->datum1, a->isnull1,
+								  b->datum1, b->isnull1,
+								  state->sortKeys);
+	if (compare != 0)
+		return compare;
+
+	/* if we have abbreviations, then "tuple" has the original value */
+
+	if (state->sortKeys->abbrev_converter)
+	{
+		compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
+												PointerGetDatum(b->tuple), b->isnull1,
+												state->sortKeys);
+	}
+
+	return compare;
 }
 
 static void
@@ -3743,8 +3813,8 @@ writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup)
 	}
 	else
 	{
-		waddr = DatumGetPointer(stup->datum1);
-		tuplen = datumGetSize(stup->datum1, false, state->datumTypeLen);
+		waddr = stup->tuple;
+		tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen);
 		Assert(tuplen != 0);
 	}
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to