From 4857979a461b14fcc15316e0047cfd7c9401d1f6 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Fri, 19 Mar 2021 14:13:19 -0400
Subject: [PATCH v5] Fix bug in heap space management that was overly cautious
 about fillfactor

Previously, if there were leftover ununused line pointers in a page,
a table with a low fill factor would not be available for new tuples
that were large enough to exceed fillfactor, even though the page
contained no tuples at all. This lead to unnecessary relation
extension.

Fix by allowing some slack space equal to 1/8 the maximum space that
could be taken up by line pointers. This is somewhat arbitrary, but
should allow many cases to succeed where they didn't before.

Per report from Floris van Nee.
---
 src/backend/access/heap/hio.c        | 43 ++++++++++++++++++++--------
 src/test/regress/expected/insert.out | 22 ++++++++++++++
 src/test/regress/sql/insert.sql      | 23 +++++++++++++++
 3 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 75223c9bea..2a618469db 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -335,11 +335,24 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	Buffer		buffer = InvalidBuffer;
 	Page		page;
 	Size		pageFreeSpace = 0,
-				saveFreeSpace = 0;
+				saveFreeSpace = 0,
+				targetFreeSpace = 0;
 	BlockNumber targetBlock,
 				otherBlock;
 	bool		needLock;
 
+	/*
+	 * Since pages without tuples as a result of vacuuming can still have some
+	 * space occupied by unused item pointers, we consider pages "empty" for
+	 * FSM requests when the unavailable space is equivalent to some number of
+	 * line pointers. We use 1/8 of MaxHeapTuplesPerPage for the threshold
+	 * calculation, which is somewhat arbitrary, but should prevent most
+	 * unnecessary relation extensions due to not finding "empty" pages while
+	 * inserting large tuples into tables with low fillfactors.
+	 */
+	const Size	maxPaddedFsmRequest = MaxHeapTupleSize -
+	(MaxHeapTuplesPerPage / 8 * sizeof(ItemIdData));
+
 	len = MAXALIGN(len);		/* be conservative */
 
 	/* Bulk insert is not supported for updates, only inserts. */
@@ -358,6 +371,18 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
 												   HEAP_DEFAULT_FILLFACTOR);
 
+	/*
+	 * When the free space to be requested from the FSM is greater than
+	 * maxPaddedFsmRequest, this is considered equivalent to requesting
+	 * insertion on an "empty" page. If the tuple itself is larger than this,
+	 * we request its full length, otherwise we use maxPaddedFsmRequest as a
+	 * proxy for "empty". This prevents unnecessarily extending the relation.
+	 */
+	if (len + saveFreeSpace > maxPaddedFsmRequest)
+		targetFreeSpace = Max(len, maxPaddedFsmRequest);
+	else
+		targetFreeSpace = len + saveFreeSpace;
+
 	if (otherBuffer != InvalidBuffer)
 		otherBlock = BufferGetBlockNumber(otherBuffer);
 	else
@@ -376,13 +401,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
 	 * When use_fsm is false, we either put the tuple onto the existing target
 	 * page or extend the relation.
 	 */
-	if (len + saveFreeSpace > MaxHeapTupleSize)
-	{
-		/* can't fit, don't bother asking FSM */
-		targetBlock = InvalidBlockNumber;
-		use_fsm = false;
-	}
-	else if (bistate && bistate->current_buf != InvalidBuffer)
+	if (bistate && bistate->current_buf != InvalidBuffer)
 		targetBlock = BufferGetBlockNumber(bistate->current_buf);
 	else
 		targetBlock = RelationGetTargetBlock(relation);
@@ -393,7 +412,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
 		 * We have no cached target page, so ask the FSM for an initial
 		 * target.
 		 */
-		targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
+		targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
 	}
 
 	/*
@@ -517,7 +536,7 @@ loop:
 		}
 
 		pageFreeSpace = PageGetHeapFreeSpace(page);
-		if (len + saveFreeSpace <= pageFreeSpace)
+		if (targetFreeSpace <= pageFreeSpace)
 		{
 			/* use this page as future insert target, too */
 			RelationSetTargetBlock(relation, targetBlock);
@@ -550,7 +569,7 @@ loop:
 		targetBlock = RecordAndGetPageWithFreeSpace(relation,
 													targetBlock,
 													pageFreeSpace,
-													len + saveFreeSpace);
+													targetFreeSpace);
 	}
 
 	/*
@@ -582,7 +601,7 @@ loop:
 			 * Check if some other backend has extended a block for us while
 			 * we were waiting on the lock.
 			 */
-			targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace);
+			targetBlock = GetPageWithFreeSpace(relation, targetFreeSpace);
 
 			/*
 			 * If some other waiter has already extended the relation, we
diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out
index da50ee3b67..400a45f799 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -82,6 +82,28 @@ select col1, col2, char_length(col3) from inserttest;
 
 drop table inserttest;
 --
+-- test using FSM with large tuples and low fillfactor
+--
+CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
+ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
+-- simulate having a few line pointers on the page so that the
+-- page has free space slightly less than MaxHeapTupleSize
+INSERT INTO large_tuple_test (select 1, NULL);
+-- should still fit on the page
+INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
+SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
+ pg_size_pretty 
+----------------
+ 8192 bytes
+(1 row)
+
+-- add small record to the second page
+INSERT INTO large_tuple_test (select 3, NULL);
+-- now this tuple won't fit on the second page, but the insert should
+-- still succeed by extending the relation
+INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+DROP TABLE large_tuple_test;
+--
 -- check indirection (field/array assignment), cf bug #14265
 --
 -- these tests are aware that transformInsertStmt has 3 separate code paths
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index 963faa1614..560ccbc652 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -37,6 +37,29 @@ select col1, col2, char_length(col3) from inserttest;
 
 drop table inserttest;
 
+--
+-- test using FSM with large tuples and low fillfactor
+--
+CREATE TABLE large_tuple_test (a int, b text) WITH (fillfactor = 10);
+ALTER TABLE large_tuple_test ALTER COLUMN b SET STORAGE plain;
+
+-- simulate having a few line pointers on the page so that the
+-- page has free space slightly less than MaxHeapTupleSize
+INSERT INTO large_tuple_test (select 1, NULL);
+
+-- should still fit on the page
+INSERT INTO large_tuple_test (select 2, repeat('a', 1000));
+SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main'));
+
+-- add small record to the second page
+INSERT INTO large_tuple_test (select 3, NULL);
+
+-- now this tuple won't fit on the second page, but the insert should
+-- still succeed by extending the relation
+INSERT INTO large_tuple_test (select 4, repeat('a', 8126));
+
+DROP TABLE large_tuple_test;
+
 --
 -- check indirection (field/array assignment), cf bug #14265
 --
-- 
2.22.0

