Hi,

It seems to me that we could easily reclaim a bit more dead tuples in a vacuum by recalculating the OldestXmin every now and then. In a large table with a constant stream of updates/deletes and concurrent vacuums, this could make a big difference.

With the attached patch, OldestXmin is recalculated in a vacuum every 100 pages. That's a quite arbitrary number, but feels like a good one to me.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.82
diff -c -r1.82 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c	5 Jan 2007 22:19:27 -0000	1.82
--- src/backend/commands/vacuumlazy.c	16 Jan 2007 11:02:35 -0000
***************
*** 66,71 ****
--- 66,73 ----
  #define REL_TRUNCATE_MINIMUM	1000
  #define REL_TRUNCATE_FRACTION	16
  
+ /* OldestXmin is recalculated every OLDEST_XMIN_REFRESH_INTERVAL pages */
+ #define OLDEST_XMIN_REFRESH_INTERVAL 100
  
  typedef struct LVRelStats
  {
***************
*** 256,261 ****
--- 258,270 ----
  
  		vacuum_delay_point();
  
+ 		/* Get a new OldestXmin every OLDEST_XMIN_REFRESH_INTERVAL pages
+ 		 * so that we get to reclaim a little bit more dead tuples in a 
+ 		 * long-running vacuum.
+ 		 */
+ 		if (blkno % OLDEST_XMIN_REFRESH_INTERVAL == (OLDEST_XMIN_REFRESH_INTERVAL - 1))
+ 			OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
+ 
  		/*
  		 * If we are close to overrunning the available space for dead-tuple
  		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
Index: src/backend/storage/ipc/procarray.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/storage/ipc/procarray.c,v
retrieving revision 1.20
diff -c -r1.20 procarray.c
*** src/backend/storage/ipc/procarray.c	5 Jan 2007 22:19:38 -0000	1.20
--- src/backend/storage/ipc/procarray.c	16 Jan 2007 10:52:10 -0000
***************
*** 416,426 ****
  	/*
  	 * Normally we start the min() calculation with our own XID.  But if
  	 * called by checkpointer, we will not be inside a transaction, so use
! 	 * next XID as starting point for min() calculation.  (Note that if there
! 	 * are no xacts running at all, that will be the subtrans truncation
! 	 * point!)
  	 */
! 	if (IsTransactionState())
  		result = GetTopTransactionId();
  	else
  		result = ReadNewTransactionId();
--- 416,429 ----
  	/*
  	 * Normally we start the min() calculation with our own XID.  But if
  	 * called by checkpointer, we will not be inside a transaction, so use
! 	 * next XID as starting point for min() calculation.  We also don't
! 	 * include our own transaction if ignoreVacuum is true and we're a
! 	 * vacuum process ourselves.
! 	 *
! 	 * (Note that if there are no xacts running at all, that will be the
! 	 * subtrans truncation point!)
  	 */
! 	if (IsTransactionState() && !(ignoreVacuum && MyProc->inVacuum))
  		result = GetTopTransactionId();
  	else
  		result = ReadNewTransactionId();
---------------------------(end of broadcast)---------------------------
TIP 7: You can help support the PostgreSQL project by donating at

                http://www.postgresql.org/about/donate

Reply via email to