From d3cd945e2571aea730b7fc4285a1685e905375fc Mon Sep 17 00:00:00 2001
From: Robert Haas <rhaas@postgresql.org>
Date: Fri, 8 Nov 2019 10:24:34 -0500
Subject: [PATCH 1/2] Code cleanup for toast_fetch_datum and
 toast_fetch_datum_slice.

Rework some of the checks for bad TOAST chunks to be a bit simpler
and easier to understand. These checks verify that (1) we get all
and only the chunk numbers we expect to see and (2) each chunk has
the expected size. However, the existing code was a bit hard to
understand, at least for me; try to make it clearer.

As part of that, have toast_fetch_datum_slice check the relationship
between endchunk and totalchunks only with an Assert() rather than
checking every chunk number against both values. There's no need to
check that relationship in production builds because it's not a
function of whether on-disk corruption is present; it's just a
question of whether the code does the right math.

Also, have toast_fetch_datum_slice() use ereport(ERROR) rather than
elog(ERROR). Commit fd6ec93bf890314ac694dc8a7f3c45702ecc1bbd made
the two functions inconsistent with each other.

In toast_fetch_datum, rename two variables for better consistency with
toast_fetch_datum_slice. In toast_fetch_datum_slice, eliminate several
variables that are used only once, and rename length to slicelength
for clarity. In both functions, move some variables from the function
scope into the function's main loop.
---
 src/backend/access/common/detoast.c | 188 +++++++++++++---------------
 1 file changed, 86 insertions(+), 102 deletions(-)

diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c
index f752ac7bbc..ae7daa24de 100644
--- a/src/backend/access/common/detoast.c
+++ b/src/backend/access/common/detoast.c
@@ -25,7 +25,8 @@
 
 static struct varlena *toast_fetch_datum(struct varlena *attr);
 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
-											   int32 sliceoffset, int32 length);
+											   int32 sliceoffset,
+											   int32 slicelength);
 static struct varlena *toast_decompress_datum(struct varlena *attr);
 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
 
@@ -331,14 +332,9 @@ toast_fetch_datum(struct varlena *attr)
 	TupleDesc	toasttupDesc;
 	struct varlena *result;
 	struct varatt_external toast_pointer;
-	int32		ressize;
-	int32		residx,
-				nextidx;
-	int32		numchunks;
-	Pointer		chunk;
-	bool		isnull;
-	char	   *chunkdata;
-	int32		chunksize;
+	int32		attrsize;
+	int32		nextidx;
+	int32		totalchunks;
 	int			num_indexes;
 	int			validIndex;
 	SnapshotData SnapshotToast;
@@ -349,15 +345,15 @@ toast_fetch_datum(struct varlena *attr)
 	/* Must copy to access aligned fields */
 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
 
-	ressize = toast_pointer.va_extsize;
-	numchunks = ((ressize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
+	attrsize = toast_pointer.va_extsize;
+	totalchunks = ((attrsize - 1) / TOAST_MAX_CHUNK_SIZE) + 1;
 
-	result = (struct varlena *) palloc(ressize + VARHDRSZ);
+	result = (struct varlena *) palloc(attrsize + VARHDRSZ);
 
 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
-		SET_VARSIZE_COMPRESSED(result, ressize + VARHDRSZ);
+		SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
 	else
-		SET_VARSIZE(result, ressize + VARHDRSZ);
+		SET_VARSIZE(result, attrsize + VARHDRSZ);
 
 	/*
 	 * Open the toast relation and its indexes
@@ -393,6 +389,13 @@ toast_fetch_datum(struct varlena *attr)
 										   &SnapshotToast, 1, &toastkey);
 	while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
 	{
+		int32		residx;
+		Pointer		chunk;
+		bool		isnull;
+		char	   *chunkdata;
+		int32		chunksize;
+		int32		expected_size;
+
 		/*
 		 * Have a chunk, extract the sequence number and the data
 		 */
@@ -431,35 +434,22 @@ toast_fetch_datum(struct varlena *attr)
 									 residx, nextidx,
 									 toast_pointer.va_valueid,
 									 RelationGetRelationName(toastrel))));
-		if (residx < numchunks - 1)
-		{
-			if (chunksize != TOAST_MAX_CHUNK_SIZE)
-				ereport(ERROR,
-						(errcode(ERRCODE_DATA_CORRUPTED),
-						 errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s",
-										 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
-										 residx, numchunks,
-										 toast_pointer.va_valueid,
-										 RelationGetRelationName(toastrel))));
-		}
-		else if (residx == numchunks - 1)
-		{
-			if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
-				ereport(ERROR,
-						(errcode(ERRCODE_DATA_CORRUPTED),
-						 errmsg_internal("unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s",
-										 chunksize,
-										 (int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
-										 residx,
-										 toast_pointer.va_valueid,
-										 RelationGetRelationName(toastrel))));
-		}
-		else
+		if (residx > totalchunks - 1)
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
 									 residx,
-									 0, numchunks - 1,
+									 0, totalchunks - 1,
+									 toast_pointer.va_valueid,
+									 RelationGetRelationName(toastrel))));
+		expected_size = residx < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
+			: attrsize % TOAST_MAX_CHUNK_SIZE;
+		if (chunksize != expected_size)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d for toast value %u in %s",
+									 chunksize, expected_size,
+									 residx,
 									 toast_pointer.va_valueid,
 									 RelationGetRelationName(toastrel))));
 
@@ -476,7 +466,7 @@ toast_fetch_datum(struct varlena *attr)
 	/*
 	 * Final checks that we successfully fetched the datum
 	 */
-	if (nextidx != numchunks)
+	if (nextidx != totalchunks)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATA_CORRUPTED),
 				 errmsg_internal("missing chunk number %d for toast value %u in %s",
@@ -506,7 +496,8 @@ toast_fetch_datum(struct varlena *attr)
  * ----------
  */
 static struct varlena *
-toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
+toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
+						int32 slicelength)
 {
 	Relation	toastrel;
 	Relation   *toastidxs;
@@ -518,20 +509,10 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	struct varlena *result;
 	struct varatt_external toast_pointer;
 	int32		attrsize;
-	int32		residx;
 	int32		nextidx;
-	int			numchunks;
 	int			startchunk;
 	int			endchunk;
-	int32		startoffset;
-	int32		endoffset;
 	int			totalchunks;
-	Pointer		chunk;
-	bool		isnull;
-	char	   *chunkdata;
-	int32		chunksize;
-	int32		chcpystrt;
-	int32		chcpyend;
 	int			num_indexes;
 	int			validIndex;
 	SnapshotData SnapshotToast;
@@ -555,7 +536,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	if (sliceoffset >= attrsize)
 	{
 		sliceoffset = 0;
-		length = 0;
+		slicelength = 0;
 	}
 
 	/*
@@ -563,28 +544,25 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	 * rawsize tracking amount of raw data, which is stored at the beginning
 	 * as an int32 value).
 	 */
-	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && length > 0)
-		length = length + sizeof(int32);
+	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
+		slicelength = slicelength + sizeof(int32);
 
-	if (((sliceoffset + length) > attrsize) || length < 0)
-		length = attrsize - sliceoffset;
+	if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
+		slicelength = attrsize - sliceoffset;
 
-	result = (struct varlena *) palloc(length + VARHDRSZ);
+	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
 
 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
-		SET_VARSIZE_COMPRESSED(result, length + VARHDRSZ);
+		SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
 	else
-		SET_VARSIZE(result, length + VARHDRSZ);
+		SET_VARSIZE(result, slicelength + VARHDRSZ);
 
-	if (length == 0)
+	if (slicelength == 0)
 		return result;			/* Can save a lot of work at this point! */
 
 	startchunk = sliceoffset / TOAST_MAX_CHUNK_SIZE;
-	endchunk = (sliceoffset + length - 1) / TOAST_MAX_CHUNK_SIZE;
-	numchunks = (endchunk - startchunk) + 1;
-
-	startoffset = sliceoffset % TOAST_MAX_CHUNK_SIZE;
-	endoffset = (sliceoffset + length - 1) % TOAST_MAX_CHUNK_SIZE;
+	endchunk = (sliceoffset + slicelength - 1) / TOAST_MAX_CHUNK_SIZE;
+	Assert(endchunk <= totalchunks);
 
 	/*
 	 * Open the toast relation and its indexes
@@ -610,7 +588,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	/*
 	 * Use equality condition for one chunk, a range condition otherwise:
 	 */
-	if (numchunks == 1)
+	if (startchunk == endchunk)
 	{
 		ScanKeyInit(&toastkey[1],
 					(AttrNumber) 2,
@@ -642,6 +620,15 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 										   &SnapshotToast, nscankeys, toastkey);
 	while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
 	{
+		int32		residx;
+		Pointer		chunk;
+		bool		isnull;
+		char	   *chunkdata;
+		int32		chunksize;
+		int32		expected_size;
+		int32		chcpystrt;
+		int32		chcpyend;
+
 		/*
 		 * Have a chunk, extract the sequence number and the data
 		 */
@@ -673,36 +660,31 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 		/*
 		 * Some checks on the data we've found
 		 */
-		if ((residx != nextidx) || (residx > endchunk) || (residx < startchunk))
-			elog(ERROR, "unexpected chunk number %d (expected %d) for toast value %u in %s",
-				 residx, nextidx,
-				 toast_pointer.va_valueid,
-				 RelationGetRelationName(toastrel));
-		if (residx < totalchunks - 1)
-		{
-			if (chunksize != TOAST_MAX_CHUNK_SIZE)
-				elog(ERROR, "unexpected chunk size %d (expected %d) in chunk %d of %d for toast value %u in %s when fetching slice",
-					 chunksize, (int) TOAST_MAX_CHUNK_SIZE,
-					 residx, totalchunks,
-					 toast_pointer.va_valueid,
-					 RelationGetRelationName(toastrel));
-		}
-		else if (residx == totalchunks - 1)
-		{
-			if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
-				elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u in %s when fetching slice",
-					 chunksize,
-					 (int) (attrsize - residx * TOAST_MAX_CHUNK_SIZE),
-					 residx,
-					 toast_pointer.va_valueid,
-					 RelationGetRelationName(toastrel));
-		}
-		else
-			elog(ERROR, "unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
-				 residx,
-				 0, totalchunks - 1,
-				 toast_pointer.va_valueid,
-				 RelationGetRelationName(toastrel));
+		if (residx != nextidx)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg_internal("unexpected chunk number %d (expected %d) for toast value %u in %s",
+									 residx, nextidx,
+									 toast_pointer.va_valueid,
+									 RelationGetRelationName(toastrel))));
+		if (residx > endchunk)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg_internal("unexpected chunk number %d (out of range %d..%d) for toast value %u in %s",
+									 residx,
+									 startchunk, endchunk,
+									 toast_pointer.va_valueid,
+									 RelationGetRelationName(toastrel))));
+		expected_size = residx < totalchunks - 1 ? TOAST_MAX_CHUNK_SIZE
+			: attrsize % TOAST_MAX_CHUNK_SIZE;
+		if (chunksize != expected_size)
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg_internal("unexpected chunk size %d (expected %d) in chunk %d for toast value %u in %s",
+									 chunksize, expected_size,
+									 residx,
+									 toast_pointer.va_valueid,
+									 RelationGetRelationName(toastrel))));
 
 		/*
 		 * Copy the data into proper place in our result
@@ -710,9 +692,9 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 		chcpystrt = 0;
 		chcpyend = chunksize - 1;
 		if (residx == startchunk)
-			chcpystrt = startoffset;
+			chcpystrt = sliceoffset % TOAST_MAX_CHUNK_SIZE;
 		if (residx == endchunk)
-			chcpyend = endoffset;
+			chcpyend = (sliceoffset + slicelength - 1) % TOAST_MAX_CHUNK_SIZE;
 
 		memcpy(VARDATA(result) +
 			   (residx * TOAST_MAX_CHUNK_SIZE - sliceoffset) + chcpystrt,
@@ -726,10 +708,12 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
 	 * Final checks that we successfully fetched the datum
 	 */
 	if (nextidx != (endchunk + 1))
-		elog(ERROR, "missing chunk number %d for toast value %u in %s",
-			 nextidx,
-			 toast_pointer.va_valueid,
-			 RelationGetRelationName(toastrel));
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg_internal("missing chunk number %d for toast value %u in %s",
+								 nextidx,
+								 toast_pointer.va_valueid,
+								 RelationGetRelationName(toastrel))));
 
 	/*
 	 * End scan and close relations
-- 
2.17.2 (Apple Git-113)

