On Sat, 2005-01-01 at 17:47, Simon Riggs wrote:
> On Sat, 2005-01-01 at 17:01, Bruce Momjian wrote:
> > Simon Riggs wrote:
> >  
> > > Well, I think we're saying: its not in 8.0 now, and we take our time to
> > > consider patches for 8.1 and accept the situation that the parameter
> > > names/meaning will change in next release.
> > 
> > I have no problem doing something for 8.0 if we can find something that
> > meets all the items I mentioned.
> > 
> > One idea would be to just remove bgwriter_percent.  Beta/RC users would
> > still have it in their postgresql.conf, but it is commented out so it
> > should be OK.  If they uncomment it their server would not start but we
> > could just tell testers to remove it.  I see that as better than having
> > conflicting parameters.
> 
> Can't say I like that at first thought. I'll think some more though...
> 
> > Another idea is to have bgwriter_percent be the percent of the buffer it
> > will scan.  
> 
> Hmmm....well that was my original suggestion (bg2.patch on 12 Dec)
> (...though with a bug, as Neil pointed out)
> 
> > We could default that to 50% or 100%, but we then need to
> > make sure all beta/RC users update their postgresql.conf with the new
> > default because the commented-out default will not be correct.
> 
> ...we just differ/ed on what the default should be...
> 
> > At this point I see these as our only two viable options, aside from
> > doing nothing.
> 
> > I realize our current behavior requires a full scan of the buffer cache,
> > but how often is the bgwriter_maxpages limit met?  If it is not a full
> > scan is done anyway, right?  
> 
> Well, if you heavy a very heavy read workload then that would be a
> problem. I was more worried about concurrency in a heavy write
> situation, but I can see your point, and agree.
> 
> (Idea #1 still suffers from this, so we should rule it out...)
> 
> > It seems the only way to really add
> > functionality is to change bgwriter_precent to control how much of the
> > buffer is scanned.
> 
> OK. I think you've persuaded me on idea #2, if I understand you right:
> 
> bgwriter_percent = 50 (default)
> bgwriter_maxpages = 100 (default)
> 
> percent is the number of shared_buffers we scan, limited by maxpages.
> 
> (I'll code it up in a couple of hours when the kids are in bed)

Here's the basic patch - no changes to current default values or docs.

Not sure if this is still interesting or not...

-- 
Best Regards, Simon Riggs
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.182
diff -d -c -r1.182 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c	24 Nov 2004 02:56:17 -0000	1.182
--- src/backend/storage/buffer/bufmgr.c	1 Jan 2005 21:03:16 -0000
***************
*** 682,717 ****
  	BufferDesc **dirty_buffers;
  	BufferTag  *buftags;
  	int			num_buffer_dirty;
  	int			i;
  
  	/* If either limit is zero then we are disabled from doing anything... */
  	if (percent == 0 || maxpages == 0)
  		return 0;
  
  	/*
! 	 * Get a list of all currently dirty buffers and how many there are.
  	 * We do not flush buffers that get dirtied after we started. They
! 	 * have to wait until the next checkpoint.
  	 */
! 	dirty_buffers = (BufferDesc **) palloc(NBuffers * sizeof(BufferDesc *));
! 	buftags = (BufferTag *) palloc(NBuffers * sizeof(BufferTag));
  
  	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- 	num_buffer_dirty = StrategyDirtyBufferList(dirty_buffers, buftags,
- 											   NBuffers);
  
! 	/*
! 	 * If called by the background writer, we are usually asked to only
! 	 * write out some portion of dirty buffers now, to prevent the IO
! 	 * storm at checkpoint time.
! 	 */
! 	if (percent > 0)
! 	{
! 		Assert(percent <= 100);
! 		num_buffer_dirty = (num_buffer_dirty * percent + 99) / 100;
! 	}
! 	if (maxpages > 0 && num_buffer_dirty > maxpages)
! 		num_buffer_dirty = maxpages;
  
  	/* Make sure we can handle the pin inside the loop */
  	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
--- 682,728 ----
  	BufferDesc **dirty_buffers;
  	BufferTag  *buftags;
  	int			num_buffer_dirty;
+     int         max_buffer_dirty = 1;
+     int         max_buffer_scan = 1;
  	int			i;
  
  	/* If either limit is zero then we are disabled from doing anything... */
  	if (percent == 0 || maxpages == 0)
  		return 0;
  
+     /* Set number of buffers we will scan from LRUs of buffer lists */
+     if (percent > 0 ) {
+     	Assert(percent <= 100);
+    	    max_buffer_scan = (NBuffers * percent + 99) / 100;
+     }
+ 
+     /* at checkpoint time we scan the whole buffer list */
+     if (percent < 0)
+     	max_buffer_scan = NBuffers;
+ 
+     if (maxpages < 0 || maxpages > NBuffers)
+     	max_buffer_dirty = NBuffers;
+     else
+         max_buffer_dirty = maxpages;
+ 
+     /* we cannot find more dirty buffers than we scan */
+     if (max_buffer_dirty > max_buffer_scan)
+         max_buffer_dirty = max_buffer_scan;
+ 
  	/*
! 	 * Get a list of dirty buffers to clean and how many there are.
  	 * We do not flush buffers that get dirtied after we started. They
! 	 * have to wait until the next call of this function
  	 */
! 	dirty_buffers = 
!          (BufferDesc **) palloc(max_buffer_dirty * sizeof(BufferDesc *));
! 	buftags = (BufferTag *) palloc(max_buffer_dirty * sizeof(BufferTag));
  
  	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
  
!    	num_buffer_dirty = StrategyDirtyBufferList(dirty_buffers, buftags,
! 											   max_buffer_dirty,
!                                                max_buffer_scan);
  
  	/* Make sure we can handle the pin inside the loop */
  	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
Index: src/backend/storage/buffer/freelist.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/buffer/freelist.c,v
retrieving revision 1.48
diff -d -c -r1.48 freelist.c
*** src/backend/storage/buffer/freelist.c	16 Sep 2004 16:58:31 -0000	1.48
--- src/backend/storage/buffer/freelist.c	1 Jan 2005 21:03:17 -0000
***************
*** 735,756 ****
   * StrategyDirtyBufferList
   *
   * Returns a list of dirty buffers, in priority order for writing.
-  * Note that the caller may choose not to write them all.
   *
   * The caller must beware of the possibility that a buffer is no longer dirty,
   * or even contains a different page, by the time he reaches it.  If it no
   * longer contains the same page it need not be written, even if it is (again)
   * dirty.
   *
!  * Buffer pointers are stored into buffers[], and corresponding tags into
!  * buftags[], both of size max_buffers.  The function returns the number of
!  * buffer IDs stored.
   */
  int
  StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
! 						int max_buffers)
  {
  	int			num_buffer_dirty = 0;
  	int			cdb_id_t1;
  	int			cdb_id_t2;
  	int			buf_id;
--- 735,757 ----
   * StrategyDirtyBufferList
   *
   * Returns a list of dirty buffers, in priority order for writing.
   *
   * The caller must beware of the possibility that a buffer is no longer dirty,
   * or even contains a different page, by the time he reaches it.  If it no
   * longer contains the same page it need not be written, even if it is (again)
   * dirty.
   *
!  * We scan the buffer lists T1 and T2 for at most max_buffer_scan buffers, 
!  * recording any dirty buffer pointers in buffers[], and corresponding tags into
!  * buftags[], both of size max_buffer_dirty. The function returns the number of
!  * dirty buffer IDs stored.
   */
  int
  StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
! 						int max_buffer_dirty, int max_buffer_scan)
  {
  	int			num_buffer_dirty = 0;
+ 	int			num_buffer_scan = 0;
  	int			cdb_id_t1;
  	int			cdb_id_t2;
  	int			buf_id;
***************
*** 779,790 ****
  					buffers[num_buffer_dirty] = buf;
  					buftags[num_buffer_dirty] = buf->tag;
  					num_buffer_dirty++;
! 					if (num_buffer_dirty >= max_buffers)
  						break;
  				}
  			}
  
  			cdb_id_t1 = StrategyCDB[cdb_id_t1].next;
  		}
  
  		if (cdb_id_t2 >= 0)
--- 780,794 ----
  					buffers[num_buffer_dirty] = buf;
  					buftags[num_buffer_dirty] = buf->tag;
  					num_buffer_dirty++;
! 					if (num_buffer_dirty >= max_buffer_dirty)
  						break;
  				}
  			}
  
  			cdb_id_t1 = StrategyCDB[cdb_id_t1].next;
+ 			num_buffer_scan++;
+ 			if (num_buffer_scan >= max_buffer_scan)
+ 				break;
  		}
  
  		if (cdb_id_t2 >= 0)
***************
*** 799,810 ****
  					buffers[num_buffer_dirty] = buf;
  					buftags[num_buffer_dirty] = buf->tag;
  					num_buffer_dirty++;
! 					if (num_buffer_dirty >= max_buffers)
  						break;
  				}
  			}
  
  			cdb_id_t2 = StrategyCDB[cdb_id_t2].next;
  		}
  	}
  
--- 803,817 ----
  					buffers[num_buffer_dirty] = buf;
  					buftags[num_buffer_dirty] = buf->tag;
  					num_buffer_dirty++;
! 					if (num_buffer_dirty >= max_buffer_dirty)
  						break;
  				}
  			}
  
  			cdb_id_t2 = StrategyCDB[cdb_id_t2].next;
+ 			num_buffer_scan++;
+ 			if (num_buffer_scan >= max_buffer_scan)
+ 				break;
  		}
  	}
  
Index: src/include/storage/buf_internals.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/buf_internals.h,v
retrieving revision 1.74
diff -d -c -r1.74 buf_internals.h
*** src/include/storage/buf_internals.h	16 Oct 2004 18:05:07 -0000	1.74
--- src/include/storage/buf_internals.h	1 Jan 2005 21:03:18 -0000
***************
*** 184,190 ****
  extern void StrategyInvalidateBuffer(BufferDesc *buf);
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
! 						int max_buffers);
  extern void StrategyInitialize(bool init);
  
  /* buf_table.c */
--- 184,190 ----
  extern void StrategyInvalidateBuffer(BufferDesc *buf);
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
! 						int max_buffer_dirty, int max_buffer_scan);
  extern void StrategyInitialize(bool init);
  
  /* buf_table.c */
---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to