On 1 May 2011 I wrote: > Consider this a WIP patch Just so people know where this stands... By 8 May 2011 I had the attached. I didn't post it because I was not confident I had placed the calls to the SSI functions for DROP TABLE and TRUNCATE TABLE correctly. Then came PGCon and also the row versus tuple discussion -- the patch for which had conflicts with this one. I will be reviewing this on the weekend and making any final adjustments, but the patch is very likely to look just like this with the possible exception of placement of the DROP TABLE and TRUNCATE TABLE calls. If anyone wants to start reviewing this now, it would leave more time for me to make changes if needed. Also, if anyone has comments or hints about the placement of those calls, I'd be happy to receive them. This patch compiles with `make world`, passes `make check-world`, and passes both `make installcheck-world` and `make -C src/test/isolation installcheck` against a database with default_transaction_isolation = 'serializable'. It also passed a few ad hoc tests of the implemented functionality. -Kevin
*** a/src/backend/catalog/heap.c --- b/src/backend/catalog/heap.c *************** *** 1658,1663 **** heap_drop_with_catalog(Oid relid) --- 1658,1671 ---- CheckTableNotInUse(rel, "DROP TABLE"); /* + * This effectively deletes all rows in the table, and may be done in a + * serializable transaction. In that case we must record a rw-conflict in + * to this transaction from each transaction holding a predicate lock on + * the table. + */ + CheckTableForSerializableConflictIn(rel); + + /* * Delete pg_foreign_table tuple first. */ if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) *************** *** 1688,1693 **** heap_drop_with_catalog(Oid relid) --- 1696,1706 ---- } /* + * Clean up any remaining predicate locks on the relation. + */ + DropAllPredicateLocksFromTable(rel); + + /* * Close relcache entry, but *keep* AccessExclusiveLock on the relation * until transaction commit. This ensures no one else will try to do * something with the doomed relation. *** a/src/backend/catalog/index.c --- b/src/backend/catalog/index.c *************** *** 54,59 **** --- 54,60 ---- #include "parser/parser.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" + #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" *************** *** 1311,1316 **** index_drop(Oid indexId) --- 1312,1323 ---- CheckTableNotInUse(userIndexRelation, "DROP INDEX"); /* + * All predicate locks on the index are about to be made invalid. + * Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(userIndexRelation); + + /* * Schedule physical removal of the files */ RelationDropStorage(userIndexRelation); *************** *** 2787,2792 **** reindex_index(Oid indexId, bool skip_constraint_checks) --- 2794,2805 ---- */ CheckTableNotInUse(iRel, "REINDEX INDEX"); + /* + * All predicate locks on the index are about to be made invalid. + * Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(iRel); + PG_TRY(); { /* Suppress use of the target index while rebuilding it */ *** a/src/backend/commands/cluster.c --- b/src/backend/commands/cluster.c *************** *** 39,44 **** --- 39,45 ---- #include "optimizer/planner.h" #include "storage/bufmgr.h" #include "storage/lmgr.h" + #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/acl.h" *************** *** 385,390 **** cluster_rel(Oid tableOid, Oid indexOid, bool recheck, bool verbose, --- 386,397 ---- if (OidIsValid(indexOid)) check_index_is_clusterable(OldHeap, indexOid, recheck, AccessExclusiveLock); + /* + * All predicate locks on the table and its indexes are about to be made + * invalid. Promote them to relation locks on the heap. + */ + TransferPredicateLocksToHeapRelation(OldHeap); + /* rebuild_relation does all the dirty work */ rebuild_relation(OldHeap, indexOid, freeze_min_age, freeze_table_age, verbose); *** a/src/backend/storage/lmgr/predicate.c --- b/src/backend/storage/lmgr/predicate.c *************** *** 155,160 **** --- 155,162 ---- * BlockNumber newblkno); * PredicateLockPageCombine(Relation relation, BlockNumber oldblkno, * BlockNumber newblkno); + * TransferPredicateLocksToHeapRelation(const Relation relation) + * DropAllPredicateLocksFromTable(const Relation relation) * ReleasePredicateLocks(bool isCommit) * * conflict detection (may also trigger rollback) *************** *** 162,167 **** --- 164,170 ---- * HeapTupleData *tup, Buffer buffer) * CheckForSerializableConflictIn(Relation relation, HeapTupleData *tup, * Buffer buffer) + * CheckTableForSerializableConflictIn(const Relation relation) * * final rollback checking * PreCommit_CheckForSerializationFailure(void) *************** *** 189,194 **** --- 192,198 ---- #include "storage/procarray.h" #include "utils/rel.h" #include "utils/snapmgr.h" + #include "utils/syscache.h" #include "utils/tqual.h" /* Uncomment the next line to test the graceful degradation code. */ *************** *** 434,439 **** static bool TransferPredicateLocksToNewTarget(const PREDICATELOCKTARGETTAG oldta --- 438,445 ---- const PREDICATELOCKTARGETTAG newtargettag, bool removeOld); static void PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag); + static Oid IfIndexGetRelation(Oid indexId); + static void DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer); static void SetNewSxactGlobalXmin(void); static bool ReleasePredicateLocksIfROSafe(void); static void ClearOldPredicateLocks(void); *************** *** 2543,2548 **** exit: --- 2549,2863 ---- return !outOfShmem; } + /* + * IfIndexGetRelation: given a relation OID, get the OID of the heap + * relation it is an index on, or return InvalidOid if the argument is not + * an index. Uses the system cache. + */ + static Oid + IfIndexGetRelation(Oid indexId) + { + HeapTuple tuple; + Form_pg_index index; + Oid result; + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId)); + if (!HeapTupleIsValid(tuple)) + return InvalidOid; + + index = (Form_pg_index) GETSTRUCT(tuple); + Assert(index->indexrelid == indexId); + + result = index->indrelid; + ReleaseSysCache(tuple); + return result; + } + + /* + * Drop all predicate locks of any granularity from a heap relation and all of + * its indexes, with optional transfer to a relation-level lock on the heap. + * + * This requires grabbing a lot of LW locks and scanning the entire lock + * target table for matches. That makes this more expensive than most + * predicate lock management functions, but it will only be called for DDL + * type commands and there are fast returns when no serializable transactions + * are active or the relation is temporary. + * + * We are not using the TransferPredicateLocksToNewTarget function because + * it acquires its own locks on the partitions of the two targets invovled, + * and we'll already be holding all partition locks. + * + * We can't throw an error from here, because the call could be from a + * transaction which is not serializable. + */ + static void + DropAllPredicateLocksFromTableImpl(const Relation relation, bool transfer) + { + HASH_SEQ_STATUS seqstat; + PREDICATELOCKTARGET *oldtarget; + PREDICATELOCKTARGET *heaptarget; + PREDICATELOCKTARGETTAG heaptargettag; + PREDICATELOCKTAG newpredlocktag; + Oid dbId; + Oid indexId; + Oid heapId; + int i; + bool isSingleIndex; + bool found; + uint32 reservedtargettaghash; + uint32 heaptargettaghash; + + /* + * Bail out quickly if there are no serializable transactions running. + * It's safe to check this without taking locks because the caller is + * holding an ACCESS EXCLUSIVE lock on the relation. No new locks + * which would matter here can be acquired while that is held. + */ + if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + return; + + if (SkipSplitTracking(relation)) + return; + + dbId = relation->rd_node.dbNode; + if (relation->rd_index == NULL) + { + isSingleIndex = false; + indexId = InvalidOid; /* quiet compiler warning */ + heapId = relation->rd_id; + } + else + { + isSingleIndex = true; + indexId = relation->rd_id; + heapId = relation->rd_index->indrelid; + } + Assert(heapId != InvalidOid); + Assert(transfer || !isSingleIndex); /* index OID only makes sense with transfer */ + + SET_PREDICATELOCKTARGETTAG_RELATION(heaptargettag, dbId, heapId); + heaptargettaghash = PredicateLockTargetTagHashCode(&heaptargettag); + heaptarget = NULL; /* Retrieve first time needed, then keep. */ + + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); + for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) + LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE); + LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE); + + /* + * If there are any locks to be moved, that means there we will wind up + * with a relation lock on the heap. That covers all other locks on the + * heap and all locks on all indexes for the table, so we can be very + * aggressive about deleting any locks except for the heap relation lock. + * But we don't want to add a heap relation lock unless there is at least + * one lock that needs to be transferred. If this function is called + * with an index OID, we'll first scan to see if there are any predicate + * locks on that index. As soon as we find one we drop down into the + * update loop. If we find none, we release the LW locks and return + * without changing anything. + * + * This optimization comes into play two ways -- a REINDEX might be done + * on an index which has no predicate locks, or an operation might be done + * which rewrites the entire table and calls REINDEX on each index. In the + * latter case the action against the base table will move all the index + * locks before any of the index rebuilds are requested. + */ + if (isSingleIndex) + { + bool foundIndexLock = false; + + hash_seq_init(&seqstat, PredicateLockTargetHash); + while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) + { + if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) != indexId) + continue; /* wrong OID for the index */ + if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId) + continue; /* wrong database */ + foundIndexLock = true; + hash_seq_term(&seqstat); + break; + } + if (!foundIndexLock) + { + /* Release locks in reverse order */ + LWLockRelease(SerializableXactHashLock); + for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(FirstPredicateLockMgrLock + i); + LWLockRelease(SerializablePredicateLockListLock); + return; + } + } + + /* + * Remove the reserved entry to give us scratch space, so we know we'll be + * able to create the new lock target. + */ + reservedtargettaghash = 0; /* quiet compiler warning */ + if (transfer) + { + reservedtargettaghash = PredicateLockTargetTagHashCode(&ReservedTargetTag); + hash_search_with_hash_value(PredicateLockTargetHash, + &ReservedTargetTag, + reservedtargettaghash, + HASH_REMOVE, &found); + Assert(found); + } + + /* Scan through PredicateLockHash and copy contents */ + hash_seq_init(&seqstat, PredicateLockTargetHash); + + while ((oldtarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) + { + PREDICATELOCK *oldpredlock; + + /* + * Check whether this is a target which needs attention. + */ + if (GET_PREDICATELOCKTARGETTAG_DB(oldtarget->tag) != dbId) + continue; /* wrong database */ + if (GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag) == heapId) + { + if (GET_PREDICATELOCKTARGETTAG_TYPE(oldtarget->tag) == PREDLOCKTAG_RELATION) + continue; /* already the right lock */ + } + else + { + if (IfIndexGetRelation(GET_PREDICATELOCKTARGETTAG_RELATION(oldtarget->tag)) != heapId) + continue; /* not index or index on wrong heap relation */ + } + + /* + * If we made it here, we have work to do. We make sure the heap + * relation lock exists, then we walk the list of predicate locks for + * the old target we found, moving all locks to the heap relation lock + * -- unless they already hold that. + */ + + /* + * First make sure we have the heap relation target. We only need to + * do this once. + */ + if (transfer && heaptarget == NULL) + { + heaptarget = hash_search_with_hash_value(PredicateLockTargetHash, + &heaptargettag, + heaptargettaghash, + HASH_ENTER, &found); + Assert(heaptarget != NULL); + if (!found) + SHMQueueInit(&heaptarget->predicateLocks); + newpredlocktag.myTarget = heaptarget; + } + + /* + * Loop through moving locks from this target to the relation target. + */ + oldpredlock = (PREDICATELOCK *) + SHMQueueNext(&(oldtarget->predicateLocks), + &(oldtarget->predicateLocks), + offsetof(PREDICATELOCK, targetLink)); + while (oldpredlock) + { + PREDICATELOCK *nextpredlock; + PREDICATELOCK *newpredlock; + SerCommitSeqNo oldCommitSeqNo = oldpredlock->commitSeqNo; + + nextpredlock = (PREDICATELOCK *) + SHMQueueNext(&(oldtarget->predicateLocks), + &(oldpredlock->targetLink), + offsetof(PREDICATELOCK, targetLink)); + newpredlocktag.myXact = oldpredlock->tag.myXact; + + /* + * It's OK ot remove the old lock first because of the ACCESS + * EXCLUSIVE lock on the heap relation when this is called. It is + * desirable to do so because it avoids any chance of running out + * of lock structure entries for the table. + */ + SHMQueueDelete(&(oldpredlock->xactLink)); + /* No need for retail delete from oldtarget list. */ + hash_search(PredicateLockHash, + &oldpredlock->tag, + HASH_REMOVE, &found); + Assert(found); + + if (transfer) + { + newpredlock = (PREDICATELOCK *) + hash_search_with_hash_value + (PredicateLockHash, + &newpredlocktag, + PredicateLockHashCodeFromTargetHashCode(&newpredlocktag, + heaptargettaghash), + HASH_ENTER_NULL, &found); + Assert(newpredlock != NULL); + if (!found) + { + SHMQueueInsertBefore(&(heaptarget->predicateLocks), + &(newpredlock->targetLink)); + SHMQueueInsertBefore(&(newpredlocktag.myXact->predicateLocks), + &(newpredlock->xactLink)); + newpredlock->commitSeqNo = oldCommitSeqNo; + } + else + { + if (newpredlock->commitSeqNo < oldCommitSeqNo) + newpredlock->commitSeqNo = oldCommitSeqNo; + } + + Assert(newpredlock->commitSeqNo != 0); + Assert((newpredlock->commitSeqNo == InvalidSerCommitSeqNo) + || (newpredlock->tag.myXact == OldCommittedSxact)); + } + + oldpredlock = nextpredlock; + } + + hash_search(PredicateLockTargetHash, &oldtarget->tag, HASH_REMOVE, &found); + Assert(found); + } + + if (transfer) + { + /* Put the reserved entry back */ + hash_search_with_hash_value(PredicateLockTargetHash, + &ReservedTargetTag, + reservedtargettaghash, + HASH_ENTER, &found); + Assert(!found); + } + + /* Release locks in reverse order */ + LWLockRelease(SerializableXactHashLock); + for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(FirstPredicateLockMgrLock + i); + LWLockRelease(SerializablePredicateLockListLock); + } + + /* + * TransferPredicateLocksToHeapRelation + * For all transactions, transfer all predicate locks for the given + * relation to a single relation lock on the heap. For a heap relation + * that includes all locks on indexes; for an index the same locks moves + * are needed, but only if one or more locks exists on that index. + */ + void + TransferPredicateLocksToHeapRelation(const Relation relation) + { + DropAllPredicateLocksFromTableImpl(relation, true); + } + + /* + * DropAllPredicateLocksFromTable + * For all transactions, drop all predicate locks for the given table. + * That includes all locks on its indexes. + */ + void + DropAllPredicateLocksFromTable(const Relation relation) + { + DropAllPredicateLocksFromTableImpl(relation, false); + } + /* * PredicateLockPageSplit *************** *** 3792,3797 **** CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, --- 4107,4208 ---- } /* + * CheckTableForSerializableConflictIn + * The entire table is going through a DDL-style logical mass write (like + * TRUNCATE TABLE or DROP TABLE. While these operations do not operate + * entirely within the bounds of snapshot isolation, they can occur + * inside of a serialziable transaction, and will logically occur after + * any reads which saw rows which were destroyed by these operations, so + * we do what we can to serialize properly under SSI. + * + * The relation passed in must be a heap relation for a table. Any predicate + * lock of any granularity on the table or any of its indexes will cause a + * rw-conflict in to this transaction. + * + * This should be done before altering the predicate locks because the + * transaction could be rolled back because of a conflict, in which case the + * lock changes are not needed. + */ + void + CheckTableForSerializableConflictIn(const Relation relation) + { + HASH_SEQ_STATUS seqstat; + PREDICATELOCKTARGET *target; + Oid dbId; + Oid heapId; + int i; + + /* + * Bail out quickly if there are no serializable transactions running. + * It's safe to check this without taking locks because the caller is + * holding an ACCESS EXCLUSIVE lock on the relation. No new locks + * which would matter here can be acquired while that is held. + */ + if (!TransactionIdIsValid(PredXact->SxactGlobalXmin)) + return; + + if (SkipSerialization(relation)) + return; + + Assert(relation->rd_index == NULL); + + dbId = relation->rd_node.dbNode; + heapId = relation->rd_id; + + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); + for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) + LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED); + LWLockAcquire(SerializableXactHashLock, LW_SHARED); + + /* Scan through PredicateLockHash and copy contents */ + hash_seq_init(&seqstat, PredicateLockTargetHash); + + while ((target = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) + { + PREDICATELOCK *predlock; + + /* + * Check whether this is a target which needs attention. + */ + if (GET_PREDICATELOCKTARGETTAG_DB(target->tag) != dbId) + continue; /* wrong database */ + if (GET_PREDICATELOCKTARGETTAG_RELATION(target->tag) != heapId + && IfIndexGetRelation(GET_PREDICATELOCKTARGETTAG_RELATION(target->tag)) != heapId) + continue; /* not index or index on wrong heap relation */ + + /* + * Loop through locks for this target and flag conflicts. + */ + predlock = (PREDICATELOCK *) + SHMQueueNext(&(target->predicateLocks), + &(target->predicateLocks), + offsetof(PREDICATELOCK, targetLink)); + while (predlock) + { + PREDICATELOCK *nextpredlock; + + nextpredlock = (PREDICATELOCK *) + SHMQueueNext(&(target->predicateLocks), + &(predlock->targetLink), + offsetof(PREDICATELOCK, targetLink)); + + if (predlock->tag.myXact != MySerializableXact + && !RWConflictExists(predlock->tag.myXact, (SERIALIZABLEXACT *) MySerializableXact)) + FlagRWConflict(predlock->tag.myXact, (SERIALIZABLEXACT *) MySerializableXact); + + predlock = nextpredlock; + } + } + + /* Release locks in reverse order */ + LWLockRelease(SerializableXactHashLock); + for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) + LWLockRelease(FirstPredicateLockMgrLock + i); + LWLockRelease(SerializablePredicateLockListLock); + } + + + /* * Flag a rw-dependency between two serializable transactions. * * The caller is responsible for ensuring that we have a LW lock on *** a/src/include/storage/predicate.h --- b/src/include/storage/predicate.h *************** *** 49,59 **** extern void PredicateLockPage(const Relation relation, const BlockNumber blkno); --- 49,62 ---- extern void PredicateLockTuple(const Relation relation, const HeapTuple tuple); extern void PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno); extern void PredicateLockPageCombine(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno); + extern void TransferPredicateLocksToHeapRelation(const Relation relation); + extern void DropAllPredicateLocksFromTable(const Relation relation); extern void ReleasePredicateLocks(const bool isCommit); /* conflict detection (may also trigger rollback) */ extern void CheckForSerializableConflictOut(const bool valid, const Relation relation, const HeapTuple tuple, const Buffer buffer); extern void CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, const Buffer buffer); + extern void CheckTableForSerializableConflictIn(const Relation relation); /* final rollback checking */ extern void PreCommit_CheckForSerializationFailure(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers