On Sat, Jan 30, 2016 at 11:08 PM, Michael Paquier
<michael.paqu...@gmail.com> wrote:
> On Fri, Jan 29, 2016 at 9:13 PM, Michael Paquier wrote:
>> Well, to put it short, I am just trying to find a way to make the
>> backend skip unnecessary checkpoints on an idle system, which results
>> in the following WAL pattern if system is completely idle:
>> CHECKPOINT_ONLINE
>> RUNNING_XACTS
>> RUNNING_XACTS
>> [etc..]
>>
>> The thing is that I am lost with the meaning of this condition to
>> decide if a checkpoint should be skipped or not:
>>         if (prevPtr == ControlFile->checkPointCopy.redo &&
>>             prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
>>         {
>>             WALInsertLockRelease();
>>             LWLockRelease(CheckpointLock);
>>             return;
>>         }
>> As at least one standby snapshot is logged before the checkpoint
>> record, the redo position is never going to match the previous insert
>> LSN, so checkpoints will never be skipped if wal_level >= hot_standby.
>> Skipping such unnecessary checkpoints is what you would like to
>> address, no? Because that's what I would like to do here first. And
>> once we got that right, we could think about addressing the case where
>> WAL segments are forcibly archived for idle systems.
>
> I have put a bit more of brain power into that, and finished with the
> patch attached. A new field called chkpProgressLSN is attached to
> XLogCtl, which is to the current insert position of the checkpoint
> when wal_level <= archive, or the LSN position of the standby snapshot
> taken after a checkpoint. The bgwriter code is modified as well so as
> it uses this progress LSN and compares it with the current insert LSN
> to determine if a standby snapshot should be logged or not. On an
> instance of Postgres completely idle, no useless checkpoints, and no
> useless standby snapshots are generated anymore.

Attached is an additional patch that I have used for my tests (should
have sent that yesterday). This just shows up a couple of logs in the
bgwriter and around checkpoint to see in more details their activity
with not that much noise.
-- 
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ccef3f0..e51b97e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -8259,12 +8259,20 @@ CreateCheckPoint(int flags)
 	 * better if you don't need to keep two WAL segments around to recover the
 	 * checkpoint.
 	 */
+	elog(LOG, "prevPtr %X/%X, curInsert %X/%X, progress %X/%X, redo %X/%X",
+		 (uint32) (prevPtr >> 32), (uint32) prevPtr,
+		 (uint32) (curInsert >> 32), (uint32) curInsert,
+		 (uint32) (GetProgressRecPtr() >> 32), (uint32) GetProgressRecPtr(),
+		 (uint32) (ControlFile->checkPointCopy.redo >> 32),
+		 (uint32) ControlFile->checkPointCopy.redo);
 	if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
 				  CHECKPOINT_FORCE)) == 0)
 	{
+		elog(LOG, "Not a forced or shutdown checkpoint");
 		if (GetProgressRecPtr() == prevPtr &&
 			prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
 		{
+			elog(LOG, "Checkpoint is skipped");
 			WALInsertLockRelease();
 			LWLockRelease(CheckpointLock);
 			END_CRIT_SECTION();
@@ -8437,7 +8445,11 @@ CreateCheckPoint(int flags)
 	 * snapshots need to be logged, and skip them on idle systems.
 	 */
 	if (!shutdown && XLogStandbyInfoActive())
+	{
 		progress_lsn = LogStandbySnapshot();
+		elog(LOG, "snapshot taken by checkpoint %X/%X",
+			 (uint32) (progress_lsn >> 32), (uint32) progress_lsn);
+	}
 
 	START_CRIT_SECTION();
 
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 22374b0..d55e446 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -308,17 +308,25 @@ BackgroundWriterMain(void)
 		{
 			TimestampTz timeout = 0;
 			TimestampTz now = GetCurrentTimestamp();
+			XLogRecPtr	progress_lsn = GetProgressRecPtr();
+			XLogRecPtr	insert_lsn = GetInsertRecPtr();
 
 			timeout = TimestampTzPlusMilliseconds(last_snapshot_ts,
 												  LOG_SNAPSHOT_INTERVAL_MS);
 
+			elog(LOG, "bgwriter progress_lsn %X/%X, insert_lsn %X/%X",
+				 (uint32) (progress_lsn >> 32), (uint32) progress_lsn,
+				 (uint32) (insert_lsn >> 32), (uint32) insert_lsn);
 			/*
 			 * only log if enough time has passed.
 			 */
 			if (now >= timeout &&
-				GetProgressRecPtr() != GetInsertRecPtr())
+				progress_lsn != insert_lsn)
 			{
-				(void) LogStandbySnapshot();
+				XLogRecPtr lsn;
+				lsn = LogStandbySnapshot();
+				elog(LOG, "snapshot taken by bgwriter %X/%X",
+					 (uint32) (lsn >> 32), (uint32) lsn);
 				last_snapshot_ts = now;
 			}
 		}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a2846c4..ccef3f0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -527,6 +527,8 @@ typedef struct XLogCtlData
 	TransactionId ckptXid;
 	XLogRecPtr	asyncXactLSN;	/* LSN of newest async commit/abort */
 	XLogRecPtr	replicationSlotMinLSN;	/* oldest LSN needed by any slot */
+	XLogRecPtr	ckptProgressLSN;	/* LSN tracking progress of checkpoint
+									 * and standby snapshots */
 
 	XLogSegNo	lastRemovedSegNo;		/* latest removed/recycled XLOG
 										 * segment */
@@ -6314,6 +6316,7 @@ StartupXLOG(void)
 					 checkPoint.newestCommitTsXid);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptProgressLSN = checkPoint.redo;
 
 	/*
 	 * Initialize replication slots, before there's a chance to remove
@@ -7869,6 +7872,22 @@ GetFlushRecPtr(void)
 }
 
 /*
+ * GetProgressRecPtr -- Returns the reference position of last checkpoint,
+ * taking into account standby snapshots generated by checkpoints.
+ */
+XLogRecPtr
+GetProgressRecPtr(void)
+{
+	XLogRecPtr	progress_lsn;
+
+	SpinLockAcquire(&XLogCtl->info_lck);
+	progress_lsn = XLogCtl->ckptProgressLSN;
+	SpinLockRelease(&XLogCtl->info_lck);
+
+	return progress_lsn;
+}
+
+/*
  * Get the time of the last xlog segment switch
  */
 pg_time_t
@@ -8133,6 +8152,7 @@ CreateCheckPoint(int flags)
 	XLogRecPtr	PriorRedoPtr;
 	XLogRecPtr	curInsert;
 	XLogRecPtr	prevPtr;
+	XLogRecPtr	progress_lsn;
 	VirtualTransactionId *vxids;
 	int			nvxids;
 
@@ -8214,10 +8234,13 @@ CreateCheckPoint(int flags)
 
 	/*
 	 * We must block concurrent insertions while examining insert state to
-	 * determine the checkpoint REDO pointer.
+	 * determine the checkpoint REDO pointer. The progress LSN of this
+	 * to-be-created checkpoint is for now initialized as the current insert
+	 * position in WAL. This will get updated later on depending on if
+	 * a standby snapshot is logged.
 	 */
 	WALInsertLockAcquireExclusive();
-	curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
+	curInsert = progress_lsn = XLogBytePosToRecPtr(Insert->CurrBytePos);
 	prevPtr = XLogBytePosToRecPtr(Insert->PrevBytePos);
 
 	/*
@@ -8239,7 +8262,7 @@ CreateCheckPoint(int flags)
 	if ((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
 				  CHECKPOINT_FORCE)) == 0)
 	{
-		if (prevPtr == ControlFile->checkPointCopy.redo &&
+		if (GetProgressRecPtr() == prevPtr &&
 			prevPtr / XLOG_SEG_SIZE == curInsert / XLOG_SEG_SIZE)
 		{
 			WALInsertLockRelease();
@@ -8406,9 +8429,15 @@ CreateCheckPoint(int flags)
 	 *
 	 * If we are shutting down, or Startup process is completing crash
 	 * recovery we don't need to write running xact data.
+	 *
+	 * progress_lsn is aimed at tracking the WAL progress that happens
+	 * since this checkpoint. If a standby snapshot is logged, its record
+	 * postion need to be taken into account in the progress LSN position
+	 * that is used to evaluate if checkpoints are necessary or standby
+	 * snapshots need to be logged, and skip them on idle systems.
 	 */
 	if (!shutdown && XLogStandbyInfoActive())
-		LogStandbySnapshot();
+		progress_lsn = LogStandbySnapshot();
 
 	START_CRIT_SECTION();
 
@@ -8478,10 +8507,15 @@ CreateCheckPoint(int flags)
 	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
-	/* Update shared-memory copy of checkpoint XID/epoch */
+	/*
+	 * Update shared-memory copy of checkpoint XID/epoch and the LSN
+	 * record tracking progress of checkpoint and standby snapshot
+	 * activity.
+	 */
 	SpinLockAcquire(&XLogCtl->info_lck);
 	XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
 	XLogCtl->ckptXid = checkPoint.nextXid;
+	XLogCtl->ckptProgressLSN = progress_lsn;
 	SpinLockRelease(&XLogCtl->info_lck);
 
 	/*
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 010b5fc..22374b0 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -78,12 +78,10 @@ int			BgWriterDelay = 200;
 #define LOG_SNAPSHOT_INTERVAL_MS 15000
 
 /*
- * LSN and timestamp at which we last issued a LogStandbySnapshot(), to avoid
- * doing so too often or repeatedly if there has been no other write activity
- * in the system.
+ * Timestamp at which we last issued a LogStandbySnapshot(), to avoid doing
+ * so too often.
  */
 static TimestampTz last_snapshot_ts;
-static XLogRecPtr last_snapshot_lsn = InvalidXLogRecPtr;
 
 /*
  * Flags set by interrupt handlers for later service in the main loop.
@@ -301,8 +299,8 @@ BackgroundWriterMain(void)
 		 * check whether there has been any WAL inserted since the last time
 		 * we've logged a running xacts.
 		 *
-		 * We do this logging in the bgwriter as its the only process thats
-		 * run regularly and returns to its mainloop all the time. E.g.
+		 * We do this logging in the bgwriter as it is the only process that
+		 * is run regularly and returns to its mainloop all the time. E.g.
 		 * Checkpointer, when active, is barely ever in its mainloop and thus
 		 * makes it hard to log regularly.
 		 */
@@ -315,13 +313,12 @@ BackgroundWriterMain(void)
 												  LOG_SNAPSHOT_INTERVAL_MS);
 
 			/*
-			 * only log if enough time has passed and some xlog record has
-			 * been inserted.
+			 * only log if enough time has passed.
 			 */
 			if (now >= timeout &&
-				last_snapshot_lsn != GetXLogInsertRecPtr())
+				GetProgressRecPtr() != GetInsertRecPtr())
 			{
-				last_snapshot_lsn = LogStandbySnapshot();
+				(void) LogStandbySnapshot();
 				last_snapshot_ts = now;
 			}
 		}
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index ecd30ce..41cb512 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -261,6 +261,7 @@ extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 extern XLogRecPtr GetRedoRecPtr(void);
 extern XLogRecPtr GetInsertRecPtr(void);
 extern XLogRecPtr GetFlushRecPtr(void);
+extern XLogRecPtr GetProgressRecPtr(void);
 extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch);
 extern void RemovePromoteSignalFiles(void);
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to