From e1b47ed47508d2eeb3e3a2ca2424edb56eae4dec Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Thu, 27 Feb 2025 12:59:51 +0300
Subject: [PATCH v2] AbsorbSyncRequests incrementally, instead of doing it for
 all requests at once.

---
 src/backend/postmaster/checkpointer.c | 80 +++++++++++++++++++--------
 1 file changed, 57 insertions(+), 23 deletions(-)

diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 7acbbd3e26..bdb0de4176 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1321,42 +1321,76 @@ CompactCheckpointerRequestQueue(void)
 void
 AbsorbSyncRequests(void)
 {
-	CheckpointerRequest *requests = NULL;
-	CheckpointerRequest *request;
-	int			n;
+	CheckpointerRequest	   *requests = NULL;
+	int						num_requests,
+							max_requests;
 
 	if (!AmCheckpointerProcess())
 		return;
 
 	LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
-
+	num_requests = CheckpointerShmem->num_requests;
 	/*
-	 * We try to avoid holding the lock for a long time by copying the request
-	 * array, and processing the requests after releasing the lock.
-	 *
-	 * Once we have cleared the requests from shared memory, we have to PANIC
-	 * if we then fail to absorb them (eg, because our hashtable runs out of
-	 * memory).  This is because the system cannot run safely if we are unable
-	 * to fsync what we have been told to fsync.  Fortunately, the hashtable
-	 * is so small that the problem is quite unlikely to arise in practice.
+	 * Note: this can be chosen arbitrarily, stick for 1Gb for now.
 	 */
-	n = CheckpointerShmem->num_requests;
-	if (n > 0)
+	max_requests = MaxAllocSize / sizeof(CheckpointerRequest);
+
+	for (int i = 0; i < num_requests; i += max_requests)
 	{
-		requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
-		memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
-	}
+		Size	n = (num_requests - i >= max_requests) ? max_requests :
+														 num_requests - i;
 
-	START_CRIT_SECTION();
+		if (!requests)
+			requests = palloc(n * sizeof(CheckpointerRequest));
 
-	CheckpointerShmem->num_requests = 0;
+		memcpy(requests, CheckpointerShmem->requests + i,
+			   n * sizeof(CheckpointerRequest));
 
-	LWLockRelease(CheckpointerCommLock);
+		LWLockRelease(CheckpointerCommLock);
+
+		START_CRIT_SECTION();
+
+		/*
+		 * We try to avoid holding the lock for a long time by copying the
+		 * request array, and processing the requests after releasing the lock.
+		 *
+		 * Once we have cleared the requests from shared memory, we have to
+		 * PANIC if we then fail to absorb them (eg, because our hashtable runs
+		 * out of memory).  This is because the system cannot run safely if we
+		 * are unable to fsync what we have been told to fsync.  Fortunately,
+		 * the hashtable is so small that the problem is quite unlikely to arise
+		 * in practice.
+		 */
+		for (int j = 0; j < n; j++)
+			RememberSyncRequest(&requests[j].ftag, requests[j].type);
 
-	for (request = requests; n > 0; request++, n--)
-		RememberSyncRequest(&request->ftag, request->type);
+		END_CRIT_SECTION();
 
-	END_CRIT_SECTION();
+		LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
+	}
+
+	if (CheckpointerShmem->num_requests >= num_requests)
+	{
+		/*
+		 * All the requests that were added to the queue between the release and
+		 * the lock acquisition must be processed.
+		*/
+		Size	n = CheckpointerShmem->num_requests - num_requests;
+
+		memmove(CheckpointerShmem->requests,
+				CheckpointerShmem->requests + num_requests,
+				n * sizeof(CheckpointerRequest));
+		CheckpointerShmem->num_requests = n;
+	}
+	else
+	{
+		/*
+		 * CompactCheckpointerRequestQueue was called, found some duplicates and
+		 * remove them.
+		 */
+	}
+
+	LWLockRelease(CheckpointerCommLock);
 
 	if (requests)
 		pfree(requests);
-- 
2.48.1