On Thu, Oct 15, 2015 at 9:07 AM, Robert Haas <robertmh...@gmail.com> wrote:
> Would you be willing to try coding it up that way so we can see how it looks?

I attach a patch that does it that way.

Note that I will be away for until late this month. Do not expect a
response from me before then, unless it's truly urgent.

Thanks

-- 
Peter Geoghegan
From 22936e4d69e74e8e744053024d7feea63d7cea56 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <peter.geoghega...@gmail.com>
Date: Mon, 12 Oct 2015 17:13:28 -0700
Subject: [PATCH] Correctly distinguish the contents of text buffers

Avoid confusing an original string with a strxfrm() buffer, or
vice-versa.  This could previously occur in the event of interleaving of
authoritative comparisons with conversions to abbreviated keys.  Such
interleaving is possible with external sorts.
---
 src/backend/utils/adt/varlena.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d545c34..83523f7 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -65,6 +65,7 @@ typedef struct
 	int			last_len1;		/* Length of last buf1 string/strxfrm() blob */
 	int			last_len2;		/* Length of last buf2 string/strxfrm() blob */
 	int			last_returned;	/* Last comparison result (cache) */
+	bool		cache_blob;		/* Does buf2 contain strxfrm() blob, etc? */
 	bool		collate_c;
 	hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
 	hyperLogLogState full_card; /* Full key cardinality state */
@@ -1838,17 +1839,26 @@ btsortsupport_worker(SortSupport ssup, Oid collid)
 		/* Start with invalid values */
 		tss->last_len1 = -1;
 		tss->last_len2 = -1;
+		/* Initialize */
+		tss->last_returned = 0;
 #ifdef HAVE_LOCALE_T
 		tss->locale = locale;
 #endif
 		/*
-		 * To avoid somehow confusing a strxfrm() blob and an original string
-		 * within bttextfastcmp_locale(), initialize last returned cache to a
-		 * sentinel value.  A platform-specific actual strcoll() return value
-		 * of INT_MIN seems unlikely, but if that occurs it will not cause
-		 * wrong answers.
+		 * To avoid somehow confusing a strxfrm() blob and an original string,
+		 * constantly keep track of the variety of data that buf1 and buf2
+		 * currently contain.
+		 *
+		 * Comparisons may be interleaved with conversion calls.  Frequently,
+		 * conversions and comparisons are batched into two distinct phases,
+		 * but the correctness of caching cannot hinge upon this.  For
+		 * comparison caching, buffer state is only trusted if cache_blob is
+		 * found set to false, whereas strxfrm() caching only trusts the state
+		 * when cache_blob is found set to true.
+		 *
+		 * Arbitrarily initialize cache_blob to true.
 		 */
-		tss->last_returned = INT_MIN;
+		tss->cache_blob = true;
 		tss->collate_c = collate_c;
 		ssup->ssup_extra = tss;
 
@@ -1983,7 +1993,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 		tss->buf2[len2] = '\0';
 		tss->last_len2 = len2;
 	}
-	else if (arg1_match && tss->last_returned != INT_MIN)
+	else if (arg1_match && !tss->cache_blob)
 	{
 		/* Use result cached following last actual strcoll() call */
 		result = tss->last_returned;
@@ -2006,6 +2016,7 @@ bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
 		result = strcmp(tss->buf1, tss->buf2);
 
 	/* Cache result, perhaps saving an expensive strcoll() call next time */
+	tss->cache_blob = false;
 	tss->last_returned = result;
 done:
 	/* We can't afford to leak memory here. */
@@ -2090,7 +2101,7 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 		}
 
 		/* Might be able to reuse strxfrm() blob from last call */
-		if (tss->last_len1 == len &&
+		if (tss->last_len1 == len && tss->cache_blob &&
 			memcmp(tss->buf1, authoritative_data, len) == 0)
 		{
 			memcpy(pres, tss->buf2, Min(sizeof(Datum), tss->last_len2));
@@ -2171,6 +2182,8 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 
 	addHyperLogLog(&tss->abbr_card, hash);
 
+	/* Cache result, perhaps saving an expensive strxfrm() call next time */
+	tss->cache_blob = true;
 done:
 	/*
 	 * Byteswap on little-endian machines.
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to