Re: [HACKERS] Re: Abbreviated keys for Datum tuplesort

Peter Geoghegan Fri, 13 Mar 2015 17:56:26 -0700

On Sun, Jan 25, 2015 at 3:15 AM, Andrew Gierth
<and...@tao11.riddles.org.uk> wrote:
>  Robert> I think this is a good idea.  Do you have a test case that
>  Robert> shows the benefit?
>
> The best test case for datum sort performance is to use percentile_disc,
> since that has almost no overhead beyond performing the actual sort.


I attach a slightly tweaked version of Andrew's original.

This revision makes the reverted comments within orderedsetaggs.c
consistent with back branches (no need to call abbreviation out as an
interesting special case anymore, just as in the back branches, where
abbreviation doesn't even exist). Better to keep those consistent for
backpatching purposes. Also, I've changed back tuplesort.c header
comments to how they were back in November and until recently, to
reflect that now it really is the case that only the hash index case
doesn't have the "sortKeys" field reliably set. We now need to set
"sortKeys" for the datum case, so don't say that we don't...we need to
worry about the applicability of the onlyKey optimization for the
datum sort case now, too.

Other than that, there are a number of minor stylistic tweaks. The
datum case does not support pass-by-value abbreviation, which could be
useful in theory (e.g., abbreviation of float8 values, which was
briefly discussed before). This isn't worth calling out as a special
case in the tuplesort header comments IMV; there is now a brief note
on this added to tuplesort_begin_datum(). We still support
abbreviation for pass-by-value types for non-datumsort cases (there is
of course no known example of opclass abbreviation support for a
pass-by-value type, so this is only a theoretical deficiency).

I've marked this "ready for committer".

Thanks
-- 
Peter Geoghegan

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 9ff0eff..ee13fc0 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -363,10 +363,6 @@ initialize_aggregates(AggState *aggstate,
 			 * We use a plain Datum sorter when there's a single input column;
 			 * otherwise sort the full tuple.  (See comments for
 			 * process_ordered_aggregate_single.)
-			 *
-			 * In the future, we should consider forcing the
-			 * tuplesort_begin_heap() case when the abbreviated key
-			 * optimization can thereby be used, even when numInputs is 1.
 			 */
 			peraggstate->sortstate =
 				(peraggstate->numInputs == 1) ?
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index f9a5f7f..869a83b 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -266,13 +266,7 @@ ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
 	osastate->qstate = qstate;
 	osastate->gcontext = gcontext;
 
-	/*
-	 * Initialize tuplesort object.
-	 *
-	 * In the future, we should consider forcing the tuplesort_begin_heap()
-	 * case when the abbreviated key optimization can thereby be used, even
-	 * when !use_tuples.
-	 */
+	/* Initialize tuplesort object */
 	if (use_tuples)
 		osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,
 												   qstate->numSortCols,
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 8ad5635..92f0355 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -336,9 +336,9 @@ struct Tuplesortstate
 	bool		markpos_eof;	/* saved "eof_reached" */
 
 	/*
-	 * The sortKeys variable is used by every case other than the datum and
-	 * hash index cases; it is set by tuplesort_begin_xxx.  tupDesc is only
-	 * used by the MinimalTuple and CLUSTER routines, though.
+	 * The sortKeys variable is used by every case other than the hash index
+	 * case; it is set by tuplesort_begin_xxx.  tupDesc is only used by the
+	 * MinimalTuple and CLUSTER routines, though.
 	 */
 	TupleDesc	tupDesc;
 	SortSupport sortKeys;		/* array of length nKeys */
@@ -901,29 +901,41 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->copytup = copytup_datum;
 	state->writetup = writetup_datum;
 	state->readtup = readtup_datum;
+	state->abbrevNext = 10;
 
 	state->datumType = datumType;
 
+	/* lookup necessary attributes of the datum type */
+	get_typlenbyval(datumType, &typlen, &typbyval);
+	state->datumTypeLen = typlen;
+	state->datumTypeByVal = typbyval;
+
 	/* Prepare SortSupport data */
-	state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData));
+	state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
+
+	state->sortKeys->ssup_cxt = CurrentMemoryContext;
+	state->sortKeys->ssup_collation = sortCollation;
+	state->sortKeys->ssup_nulls_first = nullsFirstFlag;
 
-	state->onlyKey->ssup_cxt = CurrentMemoryContext;
-	state->onlyKey->ssup_collation = sortCollation;
-	state->onlyKey->ssup_nulls_first = nullsFirstFlag;
 	/*
-	 * Conversion to abbreviated representation infeasible in the Datum case.
-	 * It must be possible to subsequently fetch original datum values within
-	 * tuplesort_getdatum(), which would require special-case preservation of
-	 * original values.
+	 * For the datum case, abbreviation is used only for pass-by-reference
+	 * types (in principle abbreviation can be applied for pass-by-value types
+	 * elsewhere).  It's convenient to assume that in the pass-by-value datum
+	 * case, ssup.tuple is always NULL.  There is no known example of
+	 * pass-by-value types that offer abbreviation, so this seems fine.
 	 */
-	state->onlyKey->abbreviate = false;
+	state->sortKeys->abbreviate = !typbyval;
 
-	PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);
+	PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys);
 
-	/* lookup necessary attributes of the datum type */
-	get_typlenbyval(datumType, &typlen, &typbyval);
-	state->datumTypeLen = typlen;
-	state->datumTypeByVal = typbyval;
+	/*
+	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
+	 * tie-breaker comparisons may be required.  Typically, the optimization is
+	 * only of value to pass-by-value types anyway, whereas abbreviated keys
+	 * are typically only of value to pass-by-reference types.
+	 */
+	if (!state->sortKeys->abbrev_converter)
+		state->onlyKey = state->sortKeys;
 
 	MemoryContextSwitchTo(oldcontext);
 
@@ -1307,8 +1319,16 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	SortTuple	stup;
 
 	/*
-	 * If it's a pass-by-reference value, copy it into memory we control, and
-	 * decrease availMem.  Then call the common code.
+	 * Pass-by-value types or null values are just stored directly in
+	 * stup.datum1 (and stup.tuple is not used and set to NULL).
+	 *
+	 * Non-null pass-by-reference values need to be copied into memory we
+	 * control, and possibly abbreviated.  We decrease availMem as appropriate,
+	 * either way.  The copied value is reliably pointed to by stup.tuple and
+	 * is treated as the canonical copy (e.g. it is returned by
+	 * tuplesort_getdatum, or when writing to tape);  stup.datum1 stores
+	 * abbreviated values when abbreviation is in play.  Otherwise, it's
+	 * identical to stup.tuple.
 	 */
 	if (isNull || state->datumTypeByVal)
 	{
@@ -1318,10 +1338,43 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 	}
 	else
 	{
-		stup.datum1 = datumCopy(val, false, state->datumTypeLen);
+		Datum	original = datumCopy(val, false, state->datumTypeLen);
 		stup.isnull1 = false;
-		stup.tuple = DatumGetPointer(stup.datum1);
+		stup.tuple = DatumGetPointer(original);
 		USEMEM(state, GetMemoryChunkSpace(stup.tuple));
+
+		if (!state->sortKeys->abbrev_converter)
+		{
+			stup.datum1 = original;
+		}
+		else if (!consider_abort_common(state))
+		{
+			/* Store abbreviated key representation */
+			stup.datum1 = state->sortKeys->abbrev_converter(original,
+															state->sortKeys);
+		}
+		else
+		{
+			/* Abort abbreviation */
+			int		i;
+
+			stup.datum1 = original;
+
+			/*
+			 * Set state to be consistent with never trying abbreviation.
+			 *
+			 * Alter datum1 representation in already-copied tuples, so as to
+			 * ensure a consistent representation (current tuple was just handled).
+			 * Note that we rely on all tuples copied so far actually being
+			 * contained within memtuples array.
+			 */
+			for (i = 0; i < state->memtupcount; i++)
+			{
+				SortTuple *mtup = &state->memtuples[i];
+
+				mtup->datum1 = PointerGetDatum(mtup->tuple);
+			}
+		}
 	}
 
 	puttuple_common(state, &stup);
@@ -1886,10 +1939,12 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
 	}
 	else
 	{
+		/* use stup.tuple;  stup.datum1 may store an abbreviated key */
 		if (should_free)
-			*val = stup.datum1;
+			*val = PointerGetDatum(stup.tuple);
 		else
-			*val = datumCopy(stup.datum1, false, state->datumTypeLen);
+			*val = datumCopy(PointerGetDatum(stup.tuple), false,
+							 state->datumTypeLen);
 		*isNull = false;
 	}
 
@@ -3715,9 +3770,23 @@ readtup_index(Tuplesortstate *state, SortTuple *stup,
 static int
 comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 {
-	return ApplySortComparator(a->datum1, a->isnull1,
-							   b->datum1, b->isnull1,
-							   state->onlyKey);
+	int32		compare;
+
+	compare = ApplySortComparator(a->datum1, a->isnull1,
+								  b->datum1, b->isnull1,
+								  state->sortKeys);
+	if (compare != 0)
+		return compare;
+
+	/* When using abbreviation, "tuple" stores original/authoritative value */
+	if (state->sortKeys->abbrev_converter)
+		return ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple),
+											 a->isnull1,
+											 PointerGetDatum(b->tuple),
+											 b->isnull1,
+											 state->sortKeys);
+
+	return 0;
 }
 
 static void
@@ -3746,8 +3815,9 @@ writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup)
 	}
 	else
 	{
-		waddr = DatumGetPointer(stup->datum1);
-		tuplen = datumGetSize(stup->datum1, false, state->datumTypeLen);
+		waddr = stup->tuple;
+		tuplen = datumGetSize(PointerGetDatum(stup->tuple), false,
+							  state->datumTypeLen);
 		Assert(tuplen != 0);
 	}

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Re: Abbreviated keys for Datum tuplesort

Reply via email to