On Fri, Sep 8, 2017 at 4:32 AM, Masahiko Sawada <sawada.m...@gmail.com> wrote: > On Fri, Sep 8, 2017 at 7:24 AM, Thomas Munro > <thomas.mu...@enterprisedb.com> wrote: >> On Wed, Aug 16, 2017 at 2:13 PM, Masahiko Sawada <sawada.m...@gmail.com> >> wrote: >>> The previous patch conflicts with current HEAD, I rebased the patch to >>> current HEAD. >> >> Hi Masahiko-san, >> >> FYI this doesn't build anymore. I think it's just because the wait >> event enumerators were re-alphabetised in pgstat.h: >> >> ../../../../src/include/pgstat.h:820:2: error: redeclaration of >> enumerator ‘WAIT_EVENT_LOGICAL_SYNC_DATA’ >> WAIT_EVENT_LOGICAL_SYNC_DATA, >> ^ >> ../../../../src/include/pgstat.h:806:2: note: previous definition of >> ‘WAIT_EVENT_LOGICAL_SYNC_DATA’ was here >> WAIT_EVENT_LOGICAL_SYNC_DATA, >> ^ >> ../../../../src/include/pgstat.h:821:2: error: redeclaration of >> enumerator ‘WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE’ >> WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE, >> ^ >> ../../../../src/include/pgstat.h:807:2: note: previous definition of >> ‘WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE’ was here >> WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE, >> ^ >> > > Thank you for the information! Attached rebased patch. >
Since the previous patch conflicts with current HEAD, I attached the updated patch for next CF. Regards, -- Masahiko Sawada NIPPON TELEGRAPH AND TELEPHONE CORPORATION NTT Open Source Software Center
diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index 80f803e..b928c1a 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -609,8 +609,8 @@ brin_page_cleanup(Relation idxrel, Buffer buf) */ if (PageIsNew(page)) { - LockRelationForExtension(idxrel, ShareLock); - UnlockRelationForExtension(idxrel, ShareLock); + LockRelationForExtension(idxrel, RELEXT_SHARED); + UnlockRelationForExtension(idxrel, RELEXT_SHARED); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) @@ -702,7 +702,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, */ if (!RELATION_IS_LOCAL(irel)) { - LockRelationForExtension(irel, ExclusiveLock); + LockRelationForExtension(irel, RELEXT_EXCLUSIVE); extensionLockHeld = true; } buf = ReadBuffer(irel, P_NEW); @@ -754,7 +754,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, } if (extensionLockHeld) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE); ReleaseBuffer(buf); return InvalidBuffer; @@ -764,7 +764,7 @@ brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz, LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (extensionLockHeld) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE); page = BufferGetPage(buf); diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 22f2076..4c15b45 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -570,7 +570,7 @@ revmap_physical_extend(BrinRevmap *revmap) else { if (needLock) - LockRelationForExtension(irel, ExclusiveLock); + LockRelationForExtension(irel, RELEXT_EXCLUSIVE); buf = ReadBuffer(irel, P_NEW); if (BufferGetBlockNumber(buf) != mapBlk) @@ -582,7 +582,7 @@ revmap_physical_extend(BrinRevmap *revmap) * page from under whoever is using it. */ if (needLock) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE); LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); return; @@ -591,7 +591,7 @@ revmap_physical_extend(BrinRevmap *revmap) page = BufferGetPage(buf); if (needLock) - UnlockRelationForExtension(irel, ExclusiveLock); + UnlockRelationForExtension(irel, RELEXT_EXCLUSIVE); } /* Check that it's a regular block (or an empty page) */ diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 136ea27..1690d21 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -325,13 +325,13 @@ GinNewBuffer(Relation index) /* Must extend the file */ needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, RELEXT_EXCLUSIVE); buffer = ReadBuffer(index, P_NEW); LockBuffer(buffer, GIN_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, RELEXT_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 31425e9..e9f84bc 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -716,10 +716,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, RELEXT_EXCLUSIVE); npages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, RELEXT_EXCLUSIVE); totFreePages = 0; @@ -766,10 +766,10 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) stats->pages_free = totFreePages; if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, RELEXT_EXCLUSIVE); stats->num_pages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, RELEXT_EXCLUSIVE); return stats; } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 26d89f7..cd351d8 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -821,13 +821,13 @@ gistNewBuffer(Relation r) needLock = !RELATION_IS_LOCAL(r); if (needLock) - LockRelationForExtension(r, ExclusiveLock); + LockRelationForExtension(r, RELEXT_EXCLUSIVE); buffer = ReadBuffer(r, P_NEW); LockBuffer(buffer, GIST_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(r, ExclusiveLock); + UnlockRelationForExtension(r, RELEXT_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 77d9d12..ca45b06 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -59,10 +59,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* try to find deleted pages */ if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); npages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); totFreePages = 0; for (blkno = GIST_ROOT_BLKNO + 1; blkno < npages; blkno++) @@ -91,10 +91,10 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) /* return statistics */ stats->pages_free = totFreePages; if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); stats->num_pages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); return stats; } diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 13e3bdc..a8ce6c7 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -519,11 +519,11 @@ loop: if (needLock) { if (!use_fsm) - LockRelationForExtension(relation, ExclusiveLock); - else if (!ConditionalLockRelationForExtension(relation, ExclusiveLock)) + LockRelationForExtension(relation, RELEXT_EXCLUSIVE); + else if (!ConditionalLockRelationForExtension(relation, RELEXT_EXCLUSIVE)) { /* Couldn't get the lock immediately; wait for it. */ - LockRelationForExtension(relation, ExclusiveLock); + LockRelationForExtension(relation, RELEXT_EXCLUSIVE); /* * Check if some other backend has extended a block for us while @@ -537,7 +537,7 @@ loop: */ if (targetBlock != InvalidBlockNumber) { - UnlockRelationForExtension(relation, ExclusiveLock); + UnlockRelationForExtension(relation, RELEXT_EXCLUSIVE); goto loop; } @@ -576,7 +576,7 @@ loop: * against vacuumlazy.c --- see comments therein. */ if (needLock) - UnlockRelationForExtension(relation, ExclusiveLock); + UnlockRelationForExtension(relation, RELEXT_EXCLUSIVE); /* * We need to initialize the empty new page. Double-check that it really diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 4c2a13a..7dc3088 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -641,7 +641,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) * Note that another backend might have extended or created the relation * by the time we get the lock. */ - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); /* Might have to re-open if a cache flush happened */ RelationOpenSmgr(rel); @@ -679,7 +679,7 @@ vm_extend(Relation rel, BlockNumber vm_nblocks) /* Update local cache with the up-to-date size */ rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); pfree(pg); } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 10697e9..e1407ac 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -658,7 +658,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) needLock = !RELATION_IS_LOCAL(rel); if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); buf = ReadBuffer(rel, P_NEW); @@ -672,7 +672,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) * condition against btvacuumscan --- see comments therein. */ if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); /* Initialize the new page before returning it */ page = BufferGetPage(buf); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 3dbafdd..394a660 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1058,10 +1058,10 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, { /* Get the current relation length */ if (needLock) - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); num_pages = RelationGetNumberOfBlocks(rel); if (needLock) - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c index 22f64b0..12be667 100644 --- a/src/backend/access/spgist/spgutils.c +++ b/src/backend/access/spgist/spgutils.c @@ -230,13 +230,13 @@ SpGistNewBuffer(Relation index) /* Must extend the file */ needLock = !RELATION_IS_LOCAL(index); if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, RELEXT_EXCLUSIVE); buffer = ReadBuffer(index, P_NEW); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, RELEXT_EXCLUSIVE); return buffer; } diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c index d7d5e90..3888d93 100644 --- a/src/backend/access/spgist/spgvacuum.c +++ b/src/backend/access/spgist/spgvacuum.c @@ -824,10 +824,10 @@ spgvacuumscan(spgBulkDeleteState *bds) { /* Get the current relation length */ if (needLock) - LockRelationForExtension(index, ExclusiveLock); + LockRelationForExtension(index, RELEXT_EXCLUSIVE); num_pages = RelationGetNumberOfBlocks(index); if (needLock) - UnlockRelationForExtension(index, ExclusiveLock); + UnlockRelationForExtension(index, RELEXT_EXCLUSIVE); /* Quit if we've scanned the whole relation */ if (blkno >= num_pages) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 30b1c08..443e230 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -849,8 +849,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * it's got exclusive lock on the whole relation. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); - LockRelationForExtension(onerel, ExclusiveLock); - UnlockRelationForExtension(onerel, ExclusiveLock); + LockRelationForExtension(onerel, RELEXT_EXCLUSIVE); + UnlockRelationForExtension(onerel, RELEXT_EXCLUSIVE); LockBufferForCleanup(buf); if (PageIsNew(page)) { diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 3a0b49c..64e26df 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3628,6 +3628,9 @@ pgstat_get_wait_ipc(WaitEventIPC w) case WAIT_EVENT_SYNC_REP: event_name = "SyncRep"; break; + case WAIT_EVENT_RELATION_EXTENSION: + event_name = "RelationExtension"; + break; /* no default case, so that compiler will warn */ } diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 4648473..498223a 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -624,7 +624,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) * Note that another backend might have extended or created the relation * by the time we get the lock. */ - LockRelationForExtension(rel, ExclusiveLock); + LockRelationForExtension(rel, RELEXT_EXCLUSIVE); /* Might have to re-open if a cache flush happened */ RelationOpenSmgr(rel); @@ -652,7 +652,7 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks) /* Update local cache with the up-to-date size */ rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now; - UnlockRelationForExtension(rel, ExclusiveLock); + UnlockRelationForExtension(rel, RELEXT_EXCLUSIVE); pfree(pg); } diff --git a/src/backend/storage/lmgr/Makefile b/src/backend/storage/lmgr/Makefile index e1b787e..2334a40 100644 --- a/src/backend/storage/lmgr/Makefile +++ b/src/backend/storage/lmgr/Makefile @@ -13,7 +13,7 @@ top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global OBJS = lmgr.o lock.o proc.o deadlock.o lwlock.o lwlocknames.o spin.o \ - s_lock.o predicate.o condition_variable.o + s_lock.o predicate.o condition_variable.o extension_lock.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/lmgr/extension_lock.c b/src/backend/storage/lmgr/extension_lock.c new file mode 100644 index 0000000..e8bbd5a --- /dev/null +++ b/src/backend/storage/lmgr/extension_lock.c @@ -0,0 +1,380 @@ +/*------------------------------------------------------------------------- + * + * extension_lock.c + * Relation extension lock manager + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/lmgr/extension_lock.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" +#include "pgstat.h" +#include "pg_trace.h" +#include "postmaster/postmaster.h" +#include "replication/slot.h" +#include "storage/ipc.h" +#include "storage/proc.h" +#include "storage/proclist.h" +#include "storage/spin.h" +#include "storage/extension_lock.h" +#include "utils/memutils.h" +#include "utils/rel.h" + +#ifdef LWLOCK_STATS +#include "utils/hsearch.h" +#endif + +/* + * Compute the hash code associated with a RELEXTLOCK. + * + * To avoid unnecessary recomputations of the hash code, we try to do this + * just once per function, and then pass it around as needed. Aside from + * passing the hashcode to hash_search_with_hash_value(), we can extract + * the lock partition number from the hashcode. + */ +#define RelExtLockTargetTagHashCode(relextlocktargettag) \ + get_hash_value(RelExtLockHash, (const void *) relextlocktargettag) + +/* + * The lockmgr's shared hash tables are partitioned to reduce contention. + * To determine which partition a given relid belongs to, compute the tag's + * hash code with ExtLockTagHashCode(), then apply one of these macros. + * NB: NUM_RELEXTENSIONLOCK_PARTITIONS must be a power of 2! + */ +#define RelExtLockHashPartition(hashcode) \ + ((hashcode) % NUM_RELEXTLOCK_PARTITIONS) +#define RelExtLockHashPartitionLock(hashcode) \ + (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + \ + LockHashPartition(hashcode)].lock) +#define RelExtLockHashPartitionLockByIndex(i) \ + (&MainLWLockArray[RELEXTLOCK_MANAGER_LWLOCK_OFFSET + (i)].lock + +#define RELEXT_VAL_EXCLUSIVE ((uint32) 1 << 24) +#define RELEXT_VAL_SHARED 1 + +#define RELEXT_LOCKMASK ((uint32) ((1 << 25) - 1)) + +/* */ +#define MAX_SIMUL_EXTLOCKS 8 + +/* + * This structure holds information per-object relation extension + * lock. held_extlocks represents the ExtLocks we're holding. + */ +typedef struct relextlock_handle +{ + Oid relid; + RelExtLock *lock; + RelExtLockMode mode; /* lock mode for this table entry */ +} relextlock_handle; +static relextlock_handle held_relextlocks[MAX_SIMUL_EXTLOCKS]; +static int num_held_relextlocks = 0; + +static bool RelExtLockAcquire(Oid relid, RelExtLockMode lockmode, bool conditional); +static void RelExtLockRelease(Oid rleid, RelExtLockMode lockmode); +static bool RelExtLockAttemptLock(RelExtLock *ext_lock, RelExtLockMode lockmode); + +/* + * Pointers to hash tables containing lock state + * + * The RelExtLockHash hash table is in shared memory + */ +static HTAB *RelExtLockHash; + +/* + * InitRelExtLock + * Initialize the relation extension lock manager's data structures. + */ +void +InitRelExtLock(long max_table_size) +{ + HASHCTL info; + long init_table_size; + + /* + * Compute init/max size to request for lock hashtables. Note these + * calculations must agree with LockShmemSize! + */ + init_table_size = max_table_size / 2; + + /* + * Allocate hash table for RELEXTLOCK structs. This stores per-relation + * lock. + */ + MemSet(&info, 0, sizeof(info)); + info.keysize = sizeof(Oid); + info.entrysize = sizeof(RelExtLock); + info.num_partitions = NUM_RELEXTLOCK_PARTITIONS; + + RelExtLockHash = ShmemInitHash("RelExtLock Hash", + init_table_size, + max_table_size, + &info, + HASH_ELEM | HASH_BLOBS | HASH_PARTITION); +} + +/* + * LockRelationForExtension + * + * This lock is used to interlock addition of pages to relations. + * We need such locking because bufmgr/smgr definition of P_NEW is not + * race-condition-proof. + * + * We assume the caller is already holding some type of regular lock on + * the relation, so no AcceptInvalidationMessages call is needed here. + */ +void +LockRelationForExtension(Relation relation, RelExtLockMode lockmode) +{ + RelExtLockAcquire(relation->rd_id, lockmode, false); +} + +/* + * ConditionalLockRelationForExtension + * + * As above, but only lock if we can get the lock without blocking. + * Returns TRUE iff the lock was acquired. + */ +bool +ConditionalLockRelationForExtension(Relation relation, RelExtLockMode lockmode) +{ + return RelExtLockAcquire(relation->rd_id, lockmode, true); +} + +/* + * RelationExtensionLockWaiterCount + * + * Count the number of processes waiting for the given relation extension lock. + * NOte that this routine doesn't acquire the partition lock. Please make sure + * that the caller must acquire partitionlock in exclusive mode or we must call + * this routine after acquired the relation extension lock of this relation. + */ +int +RelationExtensionLockWaiterCount(Relation relation) +{ + RelExtLock *ext_lock; + Oid relid; + uint32 nwaiters; + uint32 hashcode; + bool found; + + relid = relation->rd_id; + hashcode = RelExtLockTargetTagHashCode(&relid); + + ext_lock = (RelExtLock *) hash_search_with_hash_value(RelExtLockHash, + (void *) &relid, + hashcode, + HASH_FIND, &found); + /* We assume that we already acquire this lock */ + Assert(found); + + nwaiters = pg_atomic_read_u32(&(ext_lock->nwaiters)); + + return nwaiters; +} + +/* + * UnlockRelationForExtension + */ +void +UnlockRelationForExtension(Relation relation, RelExtLockMode lockmode) +{ + RelExtLockRelease(relation->rd_id, lockmode); +} + +/* + * Acquire relation extension lock and create RELEXTLOCK hash entry on shared + * hash table. To avoid dead-lock with partition lock and LWLock, we acquire + * them but don't release it here. The caller must call DeleteRelExtLock later + * to release these locks. + */ +static bool +RelExtLockAcquire(Oid relid, RelExtLockMode lockmode, bool conditional) +{ + RelExtLock *ext_lock; + LWLock *partitionLock; + uint32 hashcode; + bool found; + bool got_lock = false; + bool waited = false; + + hashcode = RelExtLockTargetTagHashCode(&relid); + partitionLock = RelExtLockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + ext_lock = (RelExtLock *) hash_search_with_hash_value(RelExtLockHash, + (void * ) &relid, + hashcode, HASH_ENTER, &found); + + if (!ext_lock) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of shared memory"), + errhint("You might need to increase max_pred_locks_per_transaction."))); + + for (;;) + { + bool ret; + + ret = RelExtLockAttemptLock(ext_lock, lockmode); + + if (ret) + { + got_lock = true; + + if (waited) + pg_atomic_sub_fetch_u32(&(ext_lock->nwaiters), 1); + + break; /* got the lock */ + } + + /* Could not get lock, return if in conditional lock */ + if (!ret && conditional) + break; + + /* Add to wait list */ + pg_atomic_add_fetch_u32(&(ext_lock->nwaiters), 1); + ConditionVariableSleep(&(ext_lock->cv), WAIT_EVENT_RELATION_EXTENSION); + } + + ConditionVariableCancelSleep(); + + if (got_lock) + { + /* Add lock to list relation extension locks held by this backend */ + held_relextlocks[num_held_relextlocks].relid = relid; + held_relextlocks[num_held_relextlocks].lock = ext_lock; + held_relextlocks[num_held_relextlocks].mode = lockmode; + num_held_relextlocks++; + } + else + LWLockRelease(partitionLock); + + /* Always end up with true if not conditional lock */ + return got_lock; +} + +/* + * RelationExtensionLockReleaseAll - release all currently-held relation extension locks + */ +void +RelationExtensionLockReleaseAll(void) +{ + while (num_held_relextlocks > 0) + { + HOLD_INTERRUPTS(); + + RelExtLockRelease(held_relextlocks[num_held_relextlocks - 1].relid, + held_relextlocks[num_held_relextlocks - 1].mode); + } +} + +/* + * ExstLockRelease + * + * Remove RELEXTLOCK from shared RelExtLockHash hash table. Since other backends + * might be acquiring it or waiting for this lock, we can delete it only if there + * is no longer backends who are interested in it. + * + * Note that we assume partition lock for hash table is already acquired when + * acquiring the lock. This routine should release partition lock as well after + * released LWLock. + */ +static void +RelExtLockRelease(Oid relid, RelExtLockMode lockmode) +{ + RelExtLock *ext_lock; + RelExtLockMode mode; + uint32 hashcode; + LWLock *partitionLock; + uint32 oldstate; + uint32 nwaiters; + int i; + + hashcode = RelExtLockTargetTagHashCode(&relid); + partitionLock = RelExtLockHashPartitionLock(hashcode); + + for (i = num_held_relextlocks; --i >= 0;) + if (relid == held_relextlocks[i].relid && + lockmode == held_relextlocks[i].mode) + break; + + if (i < 0) + elog(ERROR, "relation extension lock for %u with lock mode %d is not held", + relid, lockmode); + + ext_lock = held_relextlocks[i].lock; + mode = held_relextlocks[i].mode; + + num_held_relextlocks--; + + /* Shrink */ + for (; i < num_held_relextlocks; i++) + held_relextlocks[i] = held_relextlocks[i + 1]; + + if (mode == RELEXT_EXCLUSIVE) + oldstate = pg_atomic_sub_fetch_u32(&(ext_lock->state), RELEXT_VAL_EXCLUSIVE); + else + oldstate = pg_atomic_sub_fetch_u32(&(ext_lock->state), RELEXT_VAL_SHARED); + + nwaiters = pg_atomic_read_u32(&(ext_lock->nwaiters)); + + /* Wake up waiters if there are */ + if (nwaiters > 0) + ConditionVariableBroadcast(&(ext_lock->cv)); + else + hash_search_with_hash_value(RelExtLockHash, (void *) &relid, + hashcode, HASH_REMOVE, NULL); + + LWLockRelease(partitionLock); +} + +/* + * Internal function that tries to atomically acquire the relation extension + * lock in the passed in mode. Return true if we got the lock. + */ +static bool +RelExtLockAttemptLock(RelExtLock *ext_lock, RelExtLockMode lockmode) +{ + uint32 oldstate; + + oldstate = pg_atomic_read_u32(&ext_lock->state); + + while (true) + { + uint32 desired_state; + bool lock_free; + + desired_state = oldstate; + + if (lockmode == RELEXT_EXCLUSIVE) + { + lock_free = (oldstate & RELEXT_LOCKMASK) == 0; + if (lock_free) + desired_state += RELEXT_VAL_EXCLUSIVE; + } + else + { + lock_free = (oldstate & RELEXT_VAL_EXCLUSIVE) == 0; + if (lock_free) + desired_state += RELEXT_VAL_SHARED; + } + + if (pg_atomic_compare_exchange_u32(&ext_lock->state, + &oldstate, desired_state)) + { + if (lock_free) + return true; + else + return false; + } + } + pg_unreachable(); +} diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index fe98898..34095cb 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -319,78 +319,6 @@ UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode) } /* - * LockRelationForExtension - * - * This lock tag is used to interlock addition of pages to relations. - * We need such locking because bufmgr/smgr definition of P_NEW is not - * race-condition-proof. - * - * We assume the caller is already holding some type of regular lock on - * the relation, so no AcceptInvalidationMessages call is needed here. - */ -void -LockRelationForExtension(Relation relation, LOCKMODE lockmode) -{ - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - (void) LockAcquire(&tag, lockmode, false, false); -} - -/* - * ConditionalLockRelationForExtension - * - * As above, but only lock if we can get the lock without blocking. - * Returns TRUE iff the lock was acquired. - */ -bool -ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode) -{ - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL); -} - -/* - * RelationExtensionLockWaiterCount - * - * Count the number of processes waiting for the given relation extension lock. - */ -int -RelationExtensionLockWaiterCount(Relation relation) -{ - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - return LockWaiterCount(&tag); -} - -/* - * UnlockRelationForExtension - */ -void -UnlockRelationForExtension(Relation relation, LOCKMODE lockmode) -{ - LOCKTAG tag; - - SET_LOCKTAG_RELATION_EXTEND(tag, - relation->rd_lockInfo.lockRelId.dbId, - relation->rd_lockInfo.lockRelId.relId); - - LockRelease(&tag, lockmode, false); -} - -/* * LockPage * * Obtain a page-level lock. This is currently used by some index access @@ -961,12 +889,6 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field2, tag->locktag_field1); break; - case LOCKTAG_RELATION_EXTEND: - appendStringInfo(buf, - _("extension of relation %u of database %u"), - tag->locktag_field2, - tag->locktag_field1); - break; case LOCKTAG_PAGE: appendStringInfo(buf, _("page %u of relation %u of database %u"), diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 2b26173..bc576a7 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -45,6 +45,7 @@ #include "storage/sinvaladt.h" #include "storage/spin.h" #include "storage/standby.h" +#include "storage/lmgr.h" #include "utils/memutils.h" #include "utils/ps_status.h" #include "utils/resowner_private.h" @@ -388,6 +389,10 @@ InitLocks(void) max_table_size = NLOCKENTS(); init_table_size = max_table_size / 2; + + /* Initialize lock structure for relation extension lock */ + InitRelExtLock(max_table_size); + /* * Allocate hash table for LOCK structs. This stores per-locked-object * information. @@ -3366,6 +3371,7 @@ LockShmemSize(void) /* lock hash table */ max_table_size = NLOCKENTS(); size = add_size(size, hash_estimate_size(max_table_size, sizeof(LOCK))); + size = add_size(size, hash_estimate_size(max_table_size, sizeof(LWLock))); /* proclock hash table */ max_table_size *= 2; diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index f1060f9..bc25a53 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -451,6 +451,13 @@ InitializeLWLocks(void) for (id = 0; id < NUM_PREDICATELOCK_PARTITIONS; id++, lock++) LWLockInitialize(&lock->lock, LWTRANCHE_PREDICATE_LOCK_MANAGER); + /* Initialize relation extension lmgr's LWLocks in main array */ + lock = MainLWLockArray + NUM_INDIVIDUAL_LWLOCKS + + NUM_BUFFER_PARTITIONS + NUM_LOCK_PARTITIONS + + NUM_PREDICATELOCK_PARTITIONS; + for (id = 0; id < NUM_RELEXTLOCK_PARTITIONS; id++, lock++) + LWLockInitialize(&lock->lock, LWTRANCHE_RELEXT_LOCK_MANAGER); + /* Initialize named tranches. */ if (NamedLWLockTrancheRequests > 0) { @@ -508,6 +515,7 @@ RegisterLWLockTranches(void) LWLockRegisterTranche(LWTRANCHE_LOCK_MANAGER, "lock_manager"); LWLockRegisterTranche(LWTRANCHE_PREDICATE_LOCK_MANAGER, "predicate_lock_manager"); + LWLockRegisterTranche(LWTRANCHE_RELEXT_LOCK_MANAGER, "relext_lock_manager"); LWLockRegisterTranche(LWTRANCHE_PARALLEL_QUERY_DSA, "parallel_query_dsa"); LWLockRegisterTranche(LWTRANCHE_SESSION_DSA, diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 9e0a8ab..6d8916c 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -25,7 +25,6 @@ /* This must match enum LockTagType! */ const char *const LockTagTypeNames[] = { "relation", - "extend", "page", "tuple", "transactionid", @@ -234,7 +233,6 @@ pg_lock_status(PG_FUNCTION_ARGS) switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: - case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 089b7c3..958822f 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -816,7 +816,8 @@ typedef enum WAIT_EVENT_REPLICATION_ORIGIN_DROP, WAIT_EVENT_REPLICATION_SLOT_DROP, WAIT_EVENT_SAFE_SNAPSHOT, - WAIT_EVENT_SYNC_REP + WAIT_EVENT_SYNC_REP, + WAIT_EVENT_RELATION_EXTENSION } WaitEventIPC; /* ---------- diff --git a/src/include/storage/extension_lock.h b/src/include/storage/extension_lock.h new file mode 100644 index 0000000..f178672 --- /dev/null +++ b/src/include/storage/extension_lock.h @@ -0,0 +1,49 @@ +/*------------------------------------------------------------------------- + * + * extension_lock.h + * Relation extension lock manager + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/storage/extension_lock.h + * + *------------------------------------------------------------------------- + */ + +#ifndef EXTENSION_H +#define EXTENSION_H + +#ifdef FRONTEND +#error "extension_lock.h may not be included from frontend code" +#endif + +#include "storage/proclist_types.h" +#include "storage/s_lock.h" +#include "storage/condition_variable.h" +#include "port/atomics.h" + +typedef struct RelExtLock +{ + Oid relid; + pg_atomic_uint32 state; + pg_atomic_uint32 nwaiters; + ConditionVariable cv; +} RelExtLock; + +typedef enum RelExtLockMode +{ + RELEXT_EXCLUSIVE, + RELEXT_SHARED +} RelExtLockMode; + +/* Lock a relation for extension */ +extern void InitRelExtLock(long max_table_size); +extern void LockRelationForExtension(Relation relation, RelExtLockMode lockmode); +extern void UnlockRelationForExtension(Relation relation, RelExtLockMode lockmode); +extern bool ConditionalLockRelationForExtension(Relation relation, RelExtLockMode lockmode); +extern int RelationExtensionLockWaiterCount(Relation relation); +extern void RelationExtensionLockReleaseAll(void); + +#endif /* EXTENSION_H */ diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 0b92322..ac23354 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -15,6 +15,7 @@ #define LMGR_H #include "lib/stringinfo.h" +#include "storage/extension_lock.h" #include "storage/itemptr.h" #include "storage/lock.h" #include "utils/rel.h" @@ -34,6 +35,36 @@ typedef enum XLTW_Oper XLTW_RecheckExclusionConstr } XLTW_Oper; +typedef struct RELEXTLOCKTAG +{ + Oid relid; /* identifies the lockable object */ + LWLockMode mode; /* lock mode for this table entry */ +} RELEXTLOCKTAG; + +/* + * This structure holds information per-object relation extension + * lock. + */ +typedef struct RELEXTLOCK +{ + RELEXTLOCKTAG tag; /* hash key -- must be first */ + LWLock lock; /* LWLock for relation extension */ +} RELEXTLOCK; + +/* + * The LOCALRELEXTLOCK struct represents a local copy of data which is + * also present in the RELEXTLOCK table, organized for fast access without + * needing to acquire a LWLock. It is strictly for optimization. + */ +typedef struct LOCALRELEXTLOCK +{ + /* hash key */ + RELEXTLOCKTAG relid; /* unique identifier of locktable object */ + + /* data */ + bool held; /* is lock held? */ +} LOCALRELEXTLOCK; + extern void RelationInitLockInfo(Relation relation); /* Lock a relation */ @@ -50,13 +81,6 @@ extern bool LockHasWaitersRelation(Relation relation, LOCKMODE lockmode); extern void LockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); -/* Lock a relation for extension */ -extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode); -extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode); -extern bool ConditionalLockRelationForExtension(Relation relation, - LOCKMODE lockmode); -extern int RelationExtensionLockWaiterCount(Relation relation); - /* Lock a page (currently only used within indexes) */ extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 765431e..3be18ea 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -138,8 +138,6 @@ typedef uint16 LOCKMETHODID; typedef enum LockTagType { LOCKTAG_RELATION, /* whole relation */ - /* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */ - LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */ /* same ID info as RELATION */ LOCKTAG_PAGE, /* one page of a relation */ /* ID info for a page is RELATION info + BlockNumber */ @@ -198,14 +196,6 @@ typedef struct LOCKTAG (locktag).locktag_type = LOCKTAG_RELATION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) -#define SET_LOCKTAG_RELATION_EXTEND(locktag,dboid,reloid) \ - ((locktag).locktag_field1 = (dboid), \ - (locktag).locktag_field2 = (reloid), \ - (locktag).locktag_field3 = 0, \ - (locktag).locktag_field4 = 0, \ - (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \ - (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) - #define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index f4c4aed..2e9a1ac 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -120,14 +120,21 @@ extern PGDLLIMPORT int NamedLWLockTrancheRequests; #define LOG2_NUM_PREDICATELOCK_PARTITIONS 4 #define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) +/* Number of partitions the shared relation extension lock tables are divided into */ +#define LOG2_NUM_RELEXTLOCK_PARTITIONS 4 +#define NUM_RELEXTLOCK_PARTITIONS (1 << LOG2_NUM_RELEXTLOCK_PARTITIONS) + /* Offsets for various chunks of preallocated lwlocks. */ #define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS #define LOCK_MANAGER_LWLOCK_OFFSET \ (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS) #define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \ (LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS) -#define NUM_FIXED_LWLOCKS \ +#define RELEXTLOCK_MANAGER_LWLOCK_OFFSET \ (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS) +#define NUM_FIXED_LWLOCKS \ + (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS + \ + NUM_RELEXTLOCK_PARTITIONS) typedef enum LWLockMode { @@ -151,6 +158,8 @@ extern void LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val); extern void LWLockReleaseAll(void); extern bool LWLockHeldByMe(LWLock *lock); extern bool LWLockHeldByMeInMode(LWLock *lock, LWLockMode mode); +extern bool LWLockCheckForCleanup(LWLock *lock); +extern int LWLockWaiterCount(LWLock *lock); extern bool LWLockWaitForVar(LWLock *lock, uint64 *valptr, uint64 oldval, uint64 *newval); extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value); @@ -211,6 +220,7 @@ typedef enum BuiltinTrancheIds LWTRANCHE_BUFFER_MAPPING, LWTRANCHE_LOCK_MANAGER, LWTRANCHE_PREDICATE_LOCK_MANAGER, + LWTRANCHE_RELEXT_LOCK_MANAGER, LWTRANCHE_PARALLEL_QUERY_DSA, LWTRANCHE_SESSION_DSA, LWTRANCHE_SESSION_RECORD_TABLE,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers