When _bt_check_unique finds a dead item that has same data as new
item, LP_DEAD is set to the item. Can we reuse this dead item instead
of inserting new item? If it is possible, the growth of btree index
can be reduced.

I think it is effective in the table like branches table of pgbench to
which the same item is updated many times.

An attached patch is test implementation of this idea. This patch is
not complete, but it might be useful to show this idea.

regards,

--- Atsushi Ogawa
*** ./src/backend/access/nbtree/nbtinsert.c.orig	2005-10-08 09:57:33.316123472 +0900
--- ./src/backend/access/nbtree/nbtinsert.c	2005-10-08 11:44:39.725160800 +0900
***************
*** 40,46 ****
  
  static TransactionId _bt_check_unique(Relation rel, BTItem btitem,
  				 Relation heapRel, Buffer buf,
! 				 ScanKey itup_scankey);
  static void _bt_insertonpg(Relation rel, Buffer buf,
  			   BTStack stack,
  			   int keysz, ScanKey scankey,
--- 40,46 ----
  
  static TransactionId _bt_check_unique(Relation rel, BTItem btitem,
  				 Relation heapRel, Buffer buf,
! 				 ScanKey itup_scankey, OffsetNumber *reuse_off);
  static void _bt_insertonpg(Relation rel, Buffer buf,
  			   BTStack stack,
  			   int keysz, ScanKey scankey,
***************
*** 120,128 ****
  	 */
  	if (index_is_unique)
  	{
! 		TransactionId xwait;
  
! 		xwait = _bt_check_unique(rel, btitem, heapRel, buf, itup_scankey);
  
  		if (TransactionIdIsValid(xwait))
  		{
--- 120,130 ----
  	 */
  	if (index_is_unique)
  	{
! 		TransactionId	xwait;
! 		OffsetNumber	reuse_off = InvalidOffsetNumber;
  
! 		xwait = _bt_check_unique(rel, btitem, heapRel, buf, itup_scankey,
! 								 &reuse_off);
  
  		if (TransactionIdIsValid(xwait))
  		{
***************
*** 133,138 ****
--- 135,168 ----
  			_bt_freestack(stack);
  			goto top;
  		}
+ 
+ 		/*
+ 		 * When the reusable item was found, change TID of this item to
+ 		 * new value instead of executing _bt_insertonpg.
+ 		 */
+ 		if (reuse_off != InvalidOffsetNumber)
+ 		{
+ 			Page		page = BufferGetPage(buf);
+ 			ItemId		reuse_itemid = PageGetItemId(page, reuse_off);
+ 			BTItem		reuse_btitem = (BTItem)PageGetItem(page, reuse_itemid);
+ 
+ 			Assert(ItemIdDeleted(reuse_itemid));
+ 			Assert(_bt_isequal(RelationGetDescr(rel), page, reuse_off,
+ 							   natts, itup_scankey));
+ 
+ 			reuse_itemid->lp_flags &= ~LP_DELETE;
+ 			reuse_btitem->bti_itup.t_tid = btitem->bti_itup.t_tid;
+ 
+ 			_bt_wrtbuf(rel, buf);
+ 
+ 			/* be tidy */
+ 			_bt_freestack(stack);
+ 			_bt_freeskey(itup_scankey);
+ 
+ 			/* XXX: WAL stuff is necessary */
+ 
+ 			return;
+ 		}
  	}
  
  	/* do the insertion */
***************
*** 152,158 ****
   */
  static TransactionId
  _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
! 				 Buffer buf, ScanKey itup_scankey)
  {
  	TupleDesc	itupdesc = RelationGetDescr(rel);
  	int			natts = rel->rd_rel->relnatts;
--- 182,188 ----
   */
  static TransactionId
  _bt_check_unique(Relation rel, BTItem btitem, Relation heapRel,
! 				 Buffer buf, ScanKey itup_scankey, OffsetNumber *reuse_off)
  {
  	TupleDesc	itupdesc = RelationGetDescr(rel);
  	int			natts = rel->rd_rel->relnatts;
***************
*** 162,167 ****
--- 192,199 ----
  	BTPageOpaque opaque;
  	Buffer		nbuf = InvalidBuffer;
  
+ 	Assert(*reuse_off == InvalidOffsetNumber);
+ 
  	page = BufferGetPage(buf);
  	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
  	maxoff = PageGetMaxOffsetNumber(page);
***************
*** 262,272 ****
--- 294,330 ----
  					{
  						curitemid->lp_flags |= LP_DELETE;
  						SetBufferCommitInfoNeedsSave(buf);
+ 
+ 						if (*reuse_off == InvalidOffsetNumber &&
+ 							page == BufferGetPage(buf))
+ 						{
+ 							/*
+ 							 * The reusable item was found on the current
+ 							 * write locking page. 
+ 							 */
+ 							*reuse_off = offset;
+ 						}
  					}
  					LockBuffer(hbuffer, BUFFER_LOCK_UNLOCK);
  				}
  				ReleaseBuffer(hbuffer);
  			}
+ 			else
+ 			{
+ 				if (*reuse_off == InvalidOffsetNumber &&
+ 					page == BufferGetPage(buf))
+ 				{
+ 					if (!_bt_isequal(itupdesc, page, offset, natts,
+ 									 itup_scankey))
+ 						break;		/* we're past all the equal tuples */
+ 
+ 					/*
+ 					 * The reusable item was found on the current
+ 					 * write locking page. 
+ 					 */
+ 					*reuse_off = offset;
+ 				}
+ 			}
  		}
  
  		/*
---------------------------(end of broadcast)---------------------------
TIP 3: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faq

Reply via email to