Kevin Grittner <kgri...@ymail.com> wrote:
> Kevin Grittner <kgri...@ymail.com> wrote:
>
>> I'm strongly leaning toward the idea that a slightly tweaked
>> version of the proposed patch is appropriate for the back-branches,
>> because the fix Heikki is now suggesting seems too invasive to
>> back-patch.  I think it would make sense to apply it to master,
>> too, so that the new isolation tests can be immediately added.  We
>> can then work on the new approach for 9.4 and have the tests to
>> help confirm we are not breaking anything.  The tweak would be to
>> preserve the signature of heap_freeze_tuple(), because after the
>> more invasive fix in 9.4 the new parameters will not be needed.
>> They are only passed as non-NULL from one of the three callers, so
>> it seems best to leave those call sites alone rather than change
>> them back-and-forth.
>>
>> I will post a new patch today or tomorrow.
>
> Attached.

And here's a rough cut of what I think the alternative now
suggested by Heikki would look like.  (I've omitted the new
isolation test because that is the same as the previously posted
patch.)

Note this comment, which I think was written by Heikki back when
there was a lot more benchmarking and profiling of SSI going on:

  * Because a particular target might become obsolete, due to update to a new
  * version, before the reading transaction is obsolete, we need some way to
  * prevent errors from reuse of a tuple ID.  Rather than attempting to clean
  * up the targets as the related tuples are pruned or vacuumed, we check the
  * xmin on access.    This should be far less costly.

Based on what this patch looks like, I'm afraid he may have been
right when he wrote that.  In any event, after the exercise of
developing a first draft of searching for predicate locks to clean
up every time a tuple is pruned or vacuumed, I continue to feel
strongly that the previously-posted patch, which only takes action
when tuples are frozen by a vacuum process, is much more suitable
for backpatching.  I don't think we should switch to anything
resembling the attached without some careful benchmarking.

--
Kevin Grittner
EDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
*** a/src/backend/access/heap/pruneheap.c
--- b/src/backend/access/heap/pruneheap.c
***************
*** 21,26 ****
--- 21,27 ----
  #include "miscadmin.h"
  #include "pgstat.h"
  #include "storage/bufmgr.h"
+ #include "storage/predicate.h"
  #include "utils/rel.h"
  #include "utils/tqual.h"
  
***************
*** 52,58 **** static void heap_prune_record_redirect(PruneState *prstate,
  						   OffsetNumber offnum, OffsetNumber rdoffnum);
  static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
  static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
! 
  
  /*
   * Optionally prune and repair fragmentation in the specified page.
--- 53,62 ----
  						   OffsetNumber offnum, OffsetNumber rdoffnum);
  static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
  static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
! static void heap_prune_page_predicate_locks(Relation relation, Buffer buffer,
! 								OffsetNumber *redirected, int nredirected,
! 								OffsetNumber *nowdead, int ndead,
! 								OffsetNumber *nowunused, int nunused);
  
  /*
   * Optionally prune and repair fragmentation in the specified page.
***************
*** 200,205 **** heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
--- 204,218 ----
  									 &prstate);
  	}
  
+ 	if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
+ 	{
+ 		heap_prune_page_predicate_locks(relation, buffer,
+ 										prstate.redirected,
+ 										prstate.nredirected,
+ 										prstate.nowdead, prstate.ndead,
+ 										prstate.nowunused, prstate.nunused);
+ 	}
+ 
  	/* Any error while applying the changes is critical */
  	START_CRIT_SECTION();
  
***************
*** 640,645 **** heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
--- 653,692 ----
  	prstate->marked[offnum] = true;
  }
  
+ /* Release predicate locks for now-unreachable tuples */
+ static void
+ heap_prune_page_predicate_locks(Relation relation, Buffer buffer,
+ 								OffsetNumber *redirected, int nredirected,
+ 								OffsetNumber *nowdead, int ndead,
+ 								OffsetNumber *nowunused, int nunused)
+ {
+ 	BlockNumber blkno = BufferGetBlockNumber(buffer);
+ 	OffsetNumber *offnum;
+ 	int			i;
+ 
+ 	/* Release predicate locks on all redirected line pointers */
+ 	offnum = redirected;
+ 	for (i = 0; i < nredirected; i++)
+ 	{
+ 		ReleasePredicateTupleLocks(relation, blkno, *offnum++);
+ 		offnum++;  /* skip redirect "to" offset */
+ 	}
+ 
+ 	/* Release predicate locks on all now-dead line pointers */
+ 	offnum = nowdead;
+ 	for (i = 0; i < ndead; i++)
+ 	{
+ 		ReleasePredicateTupleLocks(relation, blkno, *offnum++);
+ 	}
+ 
+ 	/* Release predicate locks on all now-unused line pointers */
+ 	offnum = nowunused;
+ 	for (i = 0; i < nunused; i++)
+ 	{
+ 		ReleasePredicateTupleLocks(relation, blkno, *offnum++);
+ 	}
+ }
+ 
  
  /*
   * Perform the actual page changes needed by heap_page_prune.
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 54,59 ****
--- 54,60 ----
  #include "storage/bufmgr.h"
  #include "storage/freespace.h"
  #include "storage/lmgr.h"
+ #include "storage/predicate.h"
  #include "utils/lsyscache.h"
  #include "utils/memutils.h"
  #include "utils/pg_rusage.h"
***************
*** 783,789 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
--- 784,795 ----
  						HeapTupleIsHeapOnly(&tuple))
  						nkeep += 1;
  					else
+ 					{
  						tupgone = true; /* we can delete the tuple */
+ 						ReleasePredicateTupleLocks(onerel,
+ 												   ItemPointerGetBlockNumber(&(tuple.t_self)),
+ 												   ItemPointerGetOffsetNumber(&(tuple.t_self)));
+ 					}
  					all_visible = false;
  					break;
  				case HEAPTUPLE_LIVE:
*** a/src/backend/storage/lmgr/predicate.c
--- b/src/backend/storage/lmgr/predicate.c
***************
*** 156,165 ****
   *		PredicateLockTuple(Relation relation, HeapTuple tuple,
   *						Snapshot snapshot)
   *		PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
!  *							   BlockNumber newblkno);
   *		PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
!  *								 BlockNumber newblkno);
   *		TransferPredicateLocksToHeapRelation(Relation relation)
   *		ReleasePredicateLocks(bool isCommit)
   *
   * conflict detection (may also trigger rollback)
--- 156,167 ----
   *		PredicateLockTuple(Relation relation, HeapTuple tuple,
   *						Snapshot snapshot)
   *		PredicateLockPageSplit(Relation relation, BlockNumber oldblkno,
!  *							   BlockNumber newblkno)
   *		PredicateLockPageCombine(Relation relation, BlockNumber oldblkno,
!  *								 BlockNumber newblkno)
   *		TransferPredicateLocksToHeapRelation(Relation relation)
+  *		ReleasePredicateTupleLocks(Relation relation, BlockNumber blkno,
+  *						   		   OffsetNumber offset)
   *		ReleasePredicateLocks(bool isCommit)
   *
   * conflict detection (may also trigger rollback)
***************
*** 381,387 **** static SHM_QUEUE *FinishedSerializableTransactions;
   * this entry, you can ensure that there's enough scratch space available for
   * inserting one entry in the hash table. This is an otherwise-invalid tag.
   */
! static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0, 0};
  static uint32 ScratchTargetTagHash;
  static int	ScratchPartitionLock;
  
--- 383,389 ----
   * this entry, you can ensure that there's enough scratch space available for
   * inserting one entry in the hash table. This is an otherwise-invalid tag.
   */
! static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
  static uint32 ScratchTargetTagHash;
  static int	ScratchPartitionLock;
  
***************
*** 2492,2499 **** PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
  			}
  		}
  	}
- 	else
- 		targetxmin = InvalidTransactionId;
  
  	/*
  	 * Do quick-but-not-definitive test for a relation lock first.	This will
--- 2494,2499 ----
***************
*** 2512,2519 **** PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snapshot)
  									 relation->rd_node.dbNode,
  									 relation->rd_id,
  									 ItemPointerGetBlockNumber(tid),
! 									 ItemPointerGetOffsetNumber(tid),
! 									 targetxmin);
  	PredicateLockAcquire(&tag);
  }
  
--- 2512,2518 ----
  									 relation->rd_node.dbNode,
  									 relation->rd_id,
  									 ItemPointerGetBlockNumber(tid),
! 									 ItemPointerGetOffsetNumber(tid));
  	PredicateLockAcquire(&tag);
  }
  
***************
*** 3170,3175 **** SetNewSxactGlobalXmin(void)
--- 3169,3223 ----
  }
  
  /*
+  *		ReleasePredicateTupleLocks
+  *
+  * Release a heap tuple lock target and all associated locks if the tuple has
+  * been found to no longer be a possible target for delete or update by any
+  * transaction -- such as during vacuum.
+  */
+ void
+ ReleasePredicateTupleLocks(Relation relation, BlockNumber blkno,
+ 						   OffsetNumber offset)
+ {
+ 	PREDICATELOCKTARGETTAG tag;
+ 	uint32		targettaghash;
+ 	LWLockId	partitionLock;
+ 	bool		found;
+ 	PREDICATELOCKTARGET *target;
+ 
+ 	SET_PREDICATELOCKTARGETTAG_TUPLE(tag,
+ 									 relation->rd_node.dbNode,
+ 									 relation->rd_id,
+ 									 blkno,
+ 									 offset);
+ 
+ 	targettaghash = PredicateLockTargetTagHashCode(&tag);
+ 
+ 	partitionLock = PredicateLockHashPartitionLock(targettaghash);
+ 
+ 	LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
+ 	LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+ 
+ 	/* Check whether any such tuple locks exist. */
+ 	target = (PREDICATELOCKTARGET *)
+ 		hash_search_with_hash_value(PredicateLockTargetHash,
+ 									&tag, targettaghash,
+ 									HASH_FIND, &found);
+ 	if (!found)
+ 	{
+ 		LWLockRelease(partitionLock);
+ 		LWLockRelease(SerializablePredicateLockListLock);
+ 		return;
+ 	}
+ 
+ 	/* Actually delete the lock target and all associated locks. */
+ 	DeleteLockTarget(target, targettaghash);
+ 
+ 	LWLockRelease(partitionLock);
+ 	LWLockRelease(SerializablePredicateLockListLock);
+ }
+ 
+ /*
   *		ReleasePredicateLocks
   *
   * Releases predicate locks based on completion of the current transaction,
***************
*** 4283,4290 **** CheckForSerializableConflictIn(Relation relation, HeapTuple tuple,
  										 relation->rd_node.dbNode,
  										 relation->rd_id,
  						 ItemPointerGetBlockNumber(&(tuple->t_data->t_ctid)),
! 						ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid)),
! 									  HeapTupleHeaderGetXmin(tuple->t_data));
  		CheckTargetForConflictsIn(&targettag);
  	}
  
--- 4331,4337 ----
  										 relation->rd_node.dbNode,
  										 relation->rd_id,
  						 ItemPointerGetBlockNumber(&(tuple->t_data->t_ctid)),
! 						ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid)));
  		CheckTargetForConflictsIn(&targettag);
  	}
  
*** a/src/include/storage/predicate.h
--- b/src/include/storage/predicate.h
***************
*** 52,57 **** extern void PredicateLockTuple(Relation relation, HeapTuple tuple, Snapshot snap
--- 52,58 ----
  extern void PredicateLockPageSplit(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
  extern void PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, BlockNumber newblkno);
  extern void TransferPredicateLocksToHeapRelation(Relation relation);
+ extern void ReleasePredicateTupleLocks(Relation relation, BlockNumber blkno, OffsetNumber offset);
  extern void ReleasePredicateLocks(bool isCommit);
  
  /* conflict detection (may also trigger rollback) */
*** a/src/include/storage/predicate_internals.h
--- b/src/include/storage/predicate_internals.h
***************
*** 254,263 **** typedef struct SERIALIZABLEXID
   * be the target of predicate locks.
   *
   * Note that the hash function being used doesn't properly respect tag
!  * length -- it will go to a four byte boundary past the end of the tag.
!  * If you change this struct, make sure any slack space is initialized,
!  * so that any random bytes in the middle or at the end are not included
!  * in the hash.
   *
   * TODO SSI: If we always use the same fields for the same type of value, we
   * should rename these.  Holding off until it's clear there are no exceptions.
--- 254,263 ----
   * be the target of predicate locks.
   *
   * Note that the hash function being used doesn't properly respect tag
!  * length -- if the length of the structure isn't a multiple of four bytes it
!  * will go to a four byte boundary past the end of the tag.  If you change
!  * this struct, make sure any slack space is initialized, so that any random
!  * bytes in the middle or at the end are not included in the hash.
   *
   * TODO SSI: If we always use the same fields for the same type of value, we
   * should rename these.  Holding off until it's clear there are no exceptions.
***************
*** 272,278 **** typedef struct PREDICATELOCKTARGETTAG
  	uint32		locktag_field2; /* a 32-bit ID field */
  	uint32		locktag_field3; /* a 32-bit ID field */
  	uint32		locktag_field4; /* a 32-bit ID field */
- 	uint32		locktag_field5; /* a 32-bit ID field */
  } PREDICATELOCKTARGETTAG;
  
  /*
--- 272,277 ----
***************
*** 283,294 **** typedef struct PREDICATELOCKTARGETTAG
   * added when a predicate lock is requested on an object which doesn't
   * already have one.  An entry is removed when the last lock is removed from
   * its list.
-  *
-  * Because a particular target might become obsolete, due to update to a new
-  * version, before the reading transaction is obsolete, we need some way to
-  * prevent errors from reuse of a tuple ID.  Rather than attempting to clean
-  * up the targets as the related tuples are pruned or vacuumed, we check the
-  * xmin on access.	This should be far less costly.
   */
  typedef struct PREDICATELOCKTARGET
  {
--- 282,287 ----
***************
*** 398,419 **** typedef struct PredicateLockData
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = InvalidBlockNumber, \
! 	 (locktag).locktag_field4 = InvalidOffsetNumber, \
! 	 (locktag).locktag_field5 = InvalidTransactionId)
  
  #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = (blocknum), \
! 	 (locktag).locktag_field4 = InvalidOffsetNumber, \
! 	 (locktag).locktag_field5 = InvalidTransactionId)
  
! #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = (blocknum), \
! 	 (locktag).locktag_field4 = (offnum), \
! 	 (locktag).locktag_field5 = (xmin))
  
  #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
  	((Oid) (locktag).locktag_field1)
--- 391,409 ----
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = InvalidBlockNumber, \
! 	 (locktag).locktag_field4 = InvalidOffsetNumber)
  
  #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = (blocknum), \
! 	 (locktag).locktag_field4 = InvalidOffsetNumber)
  
! #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
  	((locktag).locktag_field1 = (dboid), \
  	 (locktag).locktag_field2 = (reloid), \
  	 (locktag).locktag_field3 = (blocknum), \
! 	 (locktag).locktag_field4 = (offnum))
  
  #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
  	((Oid) (locktag).locktag_field1)
***************
*** 423,430 **** typedef struct PredicateLockData
  	((BlockNumber) (locktag).locktag_field3)
  #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
  	((OffsetNumber) (locktag).locktag_field4)
- #define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
- 	((TransactionId) (locktag).locktag_field5)
  #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag)							 \
  	(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
  	 (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE :   \
--- 413,418 ----
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to