On Mon, Oct 12, 2015 at 12:31 PM, Peter Geoghegan <p...@heroku.com> wrote:
> I'll consider a more comprehensive fix.

I attach a revised fix that considers the problem of misinterpreting
the contents of the buffers in both directions.

Thanks
-- 
Peter Geoghegan
From c2d4558df3ce93b939fa319dd6e0735400833df0 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <peter.geoghega...@gmail.com>
Date: Mon, 12 Oct 2015 17:13:28 -0700
Subject: [PATCH] Correctly distinguish the contents of text buffers

Avoid confusing an original string with a strxfrm() buffer, or
vice-versa.  This could previously occur in the event of interleaving of
authoritative comparisons with conversions to abbreviated keys.  Such
interleaving is possible with external sorts.
---
 src/backend/utils/adt/varlena.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d545c34..c8574f0 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -64,7 +64,7 @@ typedef struct
 	int			buflen2;
 	int			last_len1;		/* Length of last buf1 string/strxfrm() blob */
 	int			last_len2;		/* Length of last buf2 string/strxfrm() blob */
-	int			last_returned;	/* Last comparison result (cache) */
+	int			last_returned;	/* Last comparison result/caching sentinel */
 	bool		collate_c;
 	hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
 	hyperLogLogState full_card; /* Full key cardinality state */
@@ -1843,10 +1843,17 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 #endif
 		/*
 		 * To avoid somehow confusing a strxfrm() blob and an original string
-		 * within bttextfastcmp_locale(), initialize last returned cache to a
+		 * within bttextfastcmp_locale(), initialize last_returned cache to a
 		 * sentinel value.  A platform-specific actual strcoll() return value
-		 * of INT_MIN seems unlikely, but if that occurs it will not cause
-		 * wrong answers.
+		 * of INT_MIN seems unlikely, but will be immediately changed to -1 to
+		 * remove the possibility of wrong answers.
+		 *
+		 * Comparisons that attempt to reuse last_returned may be interleaved
+		 * with conversion calls.  Frequently, conversions and comparisons are
+		 * batched into two distinct phases, but the correctness of caching
+		 * cannot hinge upon this.  For comparison caching we only trust the
+		 * cache when last_returned != INT_MIN, while for strxfrm() caching we
+		 * only trust the cache when last_returned == INT_MIN.
 		 */
 		tss->last_returned = INT_MIN;
 		tss->collate_c = collate_c;
@@ -1931,9 +1938,9 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
 	{
 		/*
-		 * No change in buf1 or buf2 contents, so avoid changing last_len1 or
-		 * last_len2.  Existing contents of buffers might still be used by next
-		 * call.
+		 * No change in buf1 or buf2 contents, so avoid changing last_len1,
+		 * last_len2, or last_returned.  Existing contents of buffers might
+		 * still be used by next call.
 		 */
 		result = 0;
 		goto done;
@@ -2005,7 +2012,12 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 	if (result == 0)
 		result = strcmp(tss->buf1, tss->buf2);
 
-	/* Cache result, perhaps saving an expensive strcoll() call next time */
+	/*
+	 * Cache result, perhaps saving an expensive strcoll() call next time.
+	 * Interpret a strcoll() return value that happens to be the sentinel value
+	 * INT_MIN as -1.
+	 */
+	result = Max(-1, result);
 	tss->last_returned = result;
 done:
 	/* We can't afford to leak memory here. */
@@ -2091,6 +2103,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 
 		/* Might be able to reuse strxfrm() blob from last call */
 		if (tss->last_len1 == len &&
+			tss->last_returned == INT_MIN &&
 			memcmp(tss->buf1, authoritative_data, len) == 0)
 		{
 			memcpy(pres, tss->buf2, Min(sizeof(Datum), tss->last_len2));
@@ -2173,6 +2186,12 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 
 done:
 	/*
+	 * Set last_returned to sentinel value to indicate that buffers store
+	 * strxfrm() original string and blob
+	 */
+	tss->last_returned = INT_MIN;
+
+	/*
 	 * Byteswap on little-endian machines.
 	 *
 	 * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to