On 03/31/2014 09:08 PM, Robert Haas wrote:
On Wed, Mar 26, 2014 at 9:45 PM, Peter Geoghegan <p...@heroku.com> wrote:
On Wed, Nov 27, 2013 at 9:10 AM, Noah Misch <n...@leadboat.com> wrote:
The threat is that rounding the read size up to the next MAXALIGN would cross
into an unreadable memory page, resulting in a SIGSEGV.  Every palloc chunk
has MAXALIGN'd size under the hood, so the excess read of "toDelete" cannot
cause a SIGSEGV.  For a stack variable, it depends on the ABI.  I'm not aware
of an ABI where the four bytes past the end of this stack variable could be
unreadable, which is not to claim I'm well-read on the topic.  We should fix
this in due course on code hygiene grounds, but I would not back-patch it.

Attached patch silences the "Invalid read of size n" complaints of
Valgrind. I agree with your general thoughts around backpatching. Note
that the patch addresses a distinct complaint from Kevin's, as
Valgrind doesn't take issue with the invalid reads past the end of
spgxlogPickSplit variables on the stack.

Is the needless zeroing this patch introduces apt to cause a
performance problem?

This function is actually pretty wacky.  If we're stuffing bytes with
undefined contents into the WAL record, maybe the answer isn't to
force the contents of those bytes to be defined, but rather to elide
them from the WAL record.

Agreed. I propose the attached, which removes the MAXALIGNs. It's not suitable for backpatching, though, as it changes the format of the WAL record.

- Heikki
diff --git a/src/backend/access/spgist/spgdoinsert.c b/src/backend/access/spgist/spgdoinsert.c
index 48f32cd..ff403ef 100644
--- a/src/backend/access/spgist/spgdoinsert.c
+++ b/src/backend/access/spgist/spgdoinsert.c
@@ -533,9 +533,9 @@ moveLeafs(Relation index, SpGistState *state,
 	{
 		XLogRecPtr	recptr;
 
-		ACCEPT_RDATA_DATA(&xlrec, MAXALIGN(sizeof(xlrec)), 0);
-		ACCEPT_RDATA_DATA(toDelete, MAXALIGN(sizeof(OffsetNumber) * nDelete), 1);
-		ACCEPT_RDATA_DATA(toInsert, MAXALIGN(sizeof(OffsetNumber) * nInsert), 2);
+		ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
+		ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
+		ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
 		ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
 		ACCEPT_RDATA_BUFFER(current->buffer, 4);
 		ACCEPT_RDATA_BUFFER(nbuf, 5);
@@ -1116,7 +1116,7 @@ doPickSplit(Relation index, SpGistState *state,
 
 	leafdata = leafptr = (char *) palloc(totalLeafSizes);
 
-	ACCEPT_RDATA_DATA(&xlrec, MAXALIGN(sizeof(xlrec)), 0);
+	ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
 	ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, 1);
 	nRdata = 2;
 
@@ -1152,7 +1152,7 @@ doPickSplit(Relation index, SpGistState *state,
 		{
 			xlrec.nDelete = nToDelete;
 			ACCEPT_RDATA_DATA(toDelete,
-							  MAXALIGN(sizeof(OffsetNumber) * nToDelete),
+							  sizeof(OffsetNumber) * nToDelete,
 							  nRdata);
 			nRdata++;
 			ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
@@ -1251,13 +1251,9 @@ doPickSplit(Relation index, SpGistState *state,
 	}
 
 	xlrec.nInsert = nToInsert;
-	ACCEPT_RDATA_DATA(toInsert,
-					  MAXALIGN(sizeof(OffsetNumber) * nToInsert),
-					  nRdata);
+	ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
 	nRdata++;
-	ACCEPT_RDATA_DATA(leafPageSelect,
-					  MAXALIGN(sizeof(uint8) * nToInsert),
-					  nRdata);
+	ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
 	nRdata++;
 	ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
 	nRdata++;
diff --git a/src/backend/access/spgist/spgxlog.c b/src/backend/access/spgist/spgxlog.c
index 1689324..cd6a4b2 100644
--- a/src/backend/access/spgist/spgxlog.c
+++ b/src/backend/access/spgist/spgxlog.c
@@ -205,7 +205,7 @@ static void
 spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
 {
 	char	   *ptr = XLogRecGetData(record);
-	spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
+	spgxlogMoveLeafs *xldata;
 	SpGistState state;
 	OffsetNumber *toDelete;
 	OffsetNumber *toInsert;
@@ -213,18 +213,19 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
 	Buffer		buffer;
 	Page		page;
 
-	fillFakeState(&state, xldata->stateSrc);
-
+	xldata = (spgxlogMoveLeafs *) ptr;
 	nInsert = xldata->replaceDead ? 1 : xldata->nMoves + 1;
 
-	ptr += MAXALIGN(sizeof(spgxlogMoveLeafs));
+	ptr += SizeOfSpgxlogMoveLeafs;
 	toDelete = (OffsetNumber *) ptr;
-	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nMoves);
+	ptr += sizeof(OffsetNumber) * xldata->nMoves;
 	toInsert = (OffsetNumber *) ptr;
-	ptr += MAXALIGN(sizeof(OffsetNumber) * nInsert);
+	ptr += sizeof(OffsetNumber) * nInsert;
 
 	/* now ptr points to the list of leaf tuples */
 
+	fillFakeState(&state, xldata->stateSrc);
+
 	/*
 	 * In normal operation we would have all three pages (source, dest, and
 	 * parent) locked simultaneously; but in WAL replay it should be safe to
@@ -252,10 +253,16 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
 
 				for (i = 0; i < nInsert; i++)
 				{
-					SpGistLeafTuple lt = (SpGistLeafTuple) ptr;
+					SpGistLeafTupleData lt;
 
-					addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]);
-					ptr += lt->size;
+					/*
+					 * the tuples are not aligned, so must copy to access
+					 * the size field.
+					 */
+					memcpy(&lt, ptr, sizeof(SpGistLeafTupleData));
+
+					addOrReplaceTuple(page, (Item) ptr, lt.size, toInsert[i]);
+					ptr += lt.size;
 				}
 
 				PageSetLSN(page, lsn);
@@ -586,7 +593,7 @@ static void
 spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 {
 	char	   *ptr = XLogRecGetData(record);
-	spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
+	spgxlogPickSplit *xldata;
 	SpGistInnerTuple innerTuple;
 	SpGistState state;
 	OffsetNumber *toDelete;
@@ -600,20 +607,21 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 	int			bbi;
 	int			i;
 
-	fillFakeState(&state, xldata->stateSrc);
-
-	ptr += MAXALIGN(sizeof(spgxlogPickSplit));
+	xldata = (spgxlogPickSplit *) ptr;
+	ptr += SizeOfSpgxlogPickSplit;
 	innerTuple = (SpGistInnerTuple) ptr;
 	ptr += innerTuple->size;
 	toDelete = (OffsetNumber *) ptr;
-	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nDelete);
+	ptr += sizeof(OffsetNumber) * xldata->nDelete;
 	toInsert = (OffsetNumber *) ptr;
-	ptr += MAXALIGN(sizeof(OffsetNumber) * xldata->nInsert);
+	ptr += sizeof(OffsetNumber) * xldata->nInsert;
 	leafPageSelect = (uint8 *) ptr;
-	ptr += MAXALIGN(sizeof(uint8) * xldata->nInsert);
+	ptr += sizeof(uint8) * xldata->nInsert;
 
 	/* now ptr points to the list of leaf tuples */
 
+	fillFakeState(&state, xldata->stateSrc);
+
 	/*
 	 * It's a bit tricky to identify which pages have been handled as
 	 * full-page images, so we explicitly count each referenced buffer.
@@ -735,15 +743,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
 	/* restore leaf tuples to src and/or dest page */
 	for (i = 0; i < xldata->nInsert; i++)
 	{
-		SpGistLeafTuple lt = (SpGistLeafTuple) ptr;
+		SpGistLeafTupleData lt;
 
-		ptr += lt->size;
+		/* the tuples are not aligned, so must copy to access the size field. */
+		memcpy(&lt, ptr, sizeof(SpGistLeafTupleData));
 
 		page = leafPageSelect[i] ? destPage : srcPage;
-		if (page == NULL)
-			continue;			/* no need to touch this page */
+		if (page != NULL)
+			addOrReplaceTuple(page, (Item) ptr, lt.size, toInsert[i]);
 
-		addOrReplaceTuple(page, (Item) lt, lt->size, toInsert[i]);
+		ptr += lt.size;
 	}
 
 	/* Now update src and dest page LSNs if needed */
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h
index 8e10ab2..75f898d 100644
--- a/src/include/access/spgist_private.h
+++ b/src/include/access/spgist_private.h
@@ -449,9 +449,9 @@ typedef struct spgxlogMoveLeafs
 	 * data follows:
 	 *		array of deleted tuple numbers, length nMoves
 	 *		array of inserted tuple numbers, length nMoves + 1 or 1
-	 *		list of leaf tuples, length nMoves + 1 or 1 (must be maxaligned)
-	 * the tuple number arrays are padded to maxalign boundaries so that the
-	 * leaf tuples will be suitably aligned
+	 *		list of leaf tuples, length nMoves + 1 or 1
+	 *
+	 * Note: the leaf tuples are not aligned!
 	 *
 	 * Note: if replaceDead is true then there is only one inserted tuple
 	 * number and only one leaf tuple in the data, because we are not copying
@@ -463,8 +463,11 @@ typedef struct spgxlogMoveLeafs
 	 *		Parent page
 	 *----------
 	 */
+	OffsetNumber offsets[1];	/* variable length */
 } spgxlogMoveLeafs;
 
+#define SizeOfSpgxlogMoveLeafs	offsetof(spgxlogMoveLeafs, offsets)
+
 typedef struct spgxlogAddNode
 {
 	RelFileNode node;
@@ -535,9 +538,9 @@ typedef struct spgxlogPickSplit
 	 *		array of deleted tuple numbers, length nDelete
 	 *		array of inserted tuple numbers, length nInsert
 	 *		array of page selector bytes for inserted tuples, length nInsert
-	 *		list of leaf tuples, length nInsert (must be maxaligned)
-	 * the tuple number and page selector arrays are padded to maxalign
-	 * boundaries so that the leaf tuples will be suitably aligned
+	 *		list of leaf tuples, length nInsert
+	 *
+	 * Note: the leaf tuples are not aligned!
 	 *
 	 * Buffer references in the rdata array are:
 	 *		Src page (only if not root and not being init'd)
@@ -546,8 +549,11 @@ typedef struct spgxlogPickSplit
 	 *		Parent page (if any; could be same as Inner)
 	 *----------
 	 */
+	SpGistInnerTuple innerTuple;	/* variable length */
 } spgxlogPickSplit;
 
+#define SizeOfSpgxlogPickSplit offsetof(spgxlogPickSplit, innerTuple)
+
 typedef struct spgxlogVacuumLeaf
 {
 	RelFileNode node;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to