Thank you for the review. I've made changes according to your comments. I don't stick on current names in the patch.
I've removed all unnecerrary indirections, and added cache aligning to LWLock arrays. On Tue, 17 Nov 2015 19:36:13 +0100 "and...@anarazel.de" <and...@anarazel.de> wrote: > On 2015-11-17 14:14:50 +0300, Ildus Kurbangaliev wrote: > > 1) We can avoid constants, and use a standard steps for tranches > > creation. > > The constants are actually a bit useful, to easily determine > builtin/non-builtin stuff. Maybe I'm missing something here, but I don't see much difference with other tranches, created in Postgres startup. In my opinion they are also pretty much builtin. > > > 2) We have only one global variable (BufferCtl) > > Don't see the advantage. This adds another, albeit predictable, > indirection in frequent callsites. There's no architectural advantage > in avoiding these. > > > 3) Tranches moved to shared memory, so we won't need to do > > an additional work here. > > I can see some benefit, but it also doesn't seem like a huge benefit. The moving base tranches to shared memory has been discussed many times. The point is using them later in pg_stat_activity and other monitoring views. > > > > 4) Also we can kept buffer locks from MainLWLockArray there (that > > was done in attached patch). > > That's the 'blockLocks' thing? That's a pretty bad name. These are > buffer mapping locks. And this seems like a separate patch, separately > debated. Changed to BufMappingPartitionLWLocks. If this patch is all about separating LWLocks of the buffer manager, why not use one patch for this task? > > > + if (!foundCtl) > > { > > - int i; > > + /* Align descriptors to a cacheline boundary. */ > > + ctl->base.bufferDescriptors = (void *) > > CACHELINEALIGN(ShmemAlloc( > > + NBuffers * sizeof(BufferDescPadded) + > > PG_CACHE_LINE_SIZE)); + > > + ctl->base.bufferBlocks = (char *) > > ShmemAlloc(NBuffers * (Size) BLCKSZ); > > I'm going to entirely veto this. > > > + strncpy(ctl->IOLWLockTrancheName, "buffer_io", > > + BUFMGR_MAX_NAME_LENGTH); > > + strncpy(ctl->contentLWLockTrancheName, > > "buffer_content", > > + BUFMGR_MAX_NAME_LENGTH); > > + strncpy(ctl->blockLWLockTrancheName, > > "buffer_blocks", > > + BUFMGR_MAX_NAME_LENGTH); > > + > > + ctl->IOLocks = (LWLockMinimallyPadded *) > > ShmemAlloc( > > + sizeof(LWLockMinimallyPadded) * > > NBuffers); > > This should be cacheline aligned. Fixed > > > - buf->io_in_progress_lock = LWLockAssign(); > > - buf->content_lock = LWLockAssign(); > > + > > LWLockInitialize(BufferDescriptorGetContentLock(buf), > > + ctl->contentLWLockTrancheId); > > + LWLockInitialize(&ctl->IOLocks[i].lock, > > + ctl->IOLWLockTrancheId); > > Seems weird to use the macro accessing content locks, but not IO > locks. Fixed > > > /* Note: these two macros only work on shared buffers, not local > > ones! */ -#define BufHdrGetBlock(bufHdr) ((Block) > > (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ)) +#define > > BufHdrGetBlock(bufHdr) ((Block) (BufferCtl->bufferBlocks + > > ((Size) (bufHdr)->buf_id) * BLCKSZ)) > > That's the additional indirection I'm talking about. > > > @@ -353,9 +353,6 @@ NumLWLocks(void) > > /* Predefined LWLocks */ > > numLocks = NUM_FIXED_LWLOCKS; > > > > - /* bufmgr.c needs two for each shared buffer */ > > - numLocks += 2 * NBuffers; > > - > > /* proc.c needs one for each backend or auxiliary process > > */ numLocks += MaxBackends + NUM_AUXILIARY_PROCS; > > Didn't you also move the buffer mapping locks... ? > > > diff --git a/src/include/storage/buf_internals.h > > b/src/include/storage/buf_internals.h index 521ee1c..e1795dc 100644 > > --- a/src/include/storage/buf_internals.h > > +++ b/src/include/storage/buf_internals.h > > @@ -95,6 +95,7 @@ typedef struct buftag > > (a).forkNum == (b).forkNum \ > > ) > > > > + > > /* > > unrelated change. > > > * The shared buffer mapping table is partitioned to reduce > > contention. > > * To determine which partition lock a given tag requires, compute > > the tag's @@ -104,10 +105,9 @@ typedef struct buftag > > #define BufTableHashPartition(hashcode) \ > > ((hashcode) % NUM_BUFFER_PARTITIONS) > > #define BufMappingPartitionLock(hashcode) \ > > - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \ > > - BufTableHashPartition(hashcode)].lock) > > + (&((BufferCtlData > > *)BufferCtl)->blockLocks[BufTableHashPartition(hashcode)].lock) > > #define BufMappingPartitionLockByIndex(i) \ > > - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock) > > + (&((BufferCtlData *)BufferCtl)->blockLocks[i].lock) > > Again, unnecessary indirections. Fixed > > > +/* Maximum length of a bufmgr lock name */ > > +#define BUFMGR_MAX_NAME_LENGTH 32 > > If we were to do this I'd just use NAMEDATALEN. Fixed > > > +/* > > + * Base control struct for the buffer manager data > > + * Located in shared memory. > > + * > > + * BufferCtl variable points to BufferCtlBase because of only > > + * the backend code knows about BufferDescPadded, LWLock and > > + * others structures and the same time it must be usable in > > + * the frontend code. > > + */ > > +typedef struct BufferCtlData > > +{ > > + BufferCtlBase base; > > Eeek. What's the point here? The point was using BufferCtlBase in the backend and frontend code. The frontend code doesn't know about LWLock and other structures. Anyway I've removed this code. -- Ildus Kurbangaliev Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index 3ae2848..7ae1491 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -18,8 +18,10 @@ #include "storage/buf_internals.h" -BufferDescPadded *BufferDescriptors; -char *BufferBlocks; +BufferDescPadded *BufferDescriptors; +char *BufferBlocks; +LWLockMinimallyPadded *BufferIOLWLocks; +LWLockMinimallyPadded *BufMappingPartitionLWLocks; /* @@ -54,6 +56,80 @@ char *BufferBlocks; * multiple times. Check the PrivateRefCount infrastructure in bufmgr.c. */ +static void +InitBufferLWLockTranches(void) +{ + int i; + bool foundCtl; + + BufferCtlData *ctl = (BufferCtlData *) ShmemInitStruct("BufferCtl", + sizeof(BufferCtlData), &foundCtl); + + if (!foundCtl) + { + BufferIOLWLocks = (LWLockMinimallyPadded *) CACHELINEALIGN(ShmemAlloc( + sizeof(LWLockMinimallyPadded) * NBuffers + PG_CACHE_LINE_SIZE)); + BufMappingPartitionLWLocks = (LWLockMinimallyPadded *) CACHELINEALIGN( + ShmemAlloc(sizeof(LWLockMinimallyPadded) * NUM_BUFFER_PARTITIONS + + PG_CACHE_LINE_SIZE)); + + strncpy(ctl->IOLWLockTrancheName, "buffer_io", + NAMEDATALEN); + strncpy(ctl->contentLWLockTrancheName, "buffer_content", + NAMEDATALEN); + strncpy(ctl->partitionLWLockTrancheName, "buffer_partition", + NAMEDATALEN); + + /* Initialize tranche fields */ + ctl->IOLWLockTranche.array_base = BufferIOLWLocks; + ctl->IOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded); + ctl->IOLWLockTranche.name = ctl->IOLWLockTrancheName; + + ctl->contentLWLockTranche.array_base = + ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock); + ctl->contentLWLockTranche.array_stride = sizeof(BufferDescPadded); + ctl->contentLWLockTranche.name = ctl->contentLWLockTrancheName; + + ctl->partitionLWLockTranche.array_base = BufMappingPartitionLWLocks; + ctl->partitionLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded); + ctl->partitionLWLockTranche.name = ctl->partitionLWLockTrancheName; + + ctl->partitionLWLockTrancheId = LWLockNewTrancheId(); + ctl->contentLWLockTrancheId = LWLockNewTrancheId(); + ctl->IOLWLockTrancheId = LWLockNewTrancheId(); + + /* Initialize LWLocks */ + for (i = 0; i < NBuffers; i++) + { + BufferDesc *buf = GetBufferDescriptor(i); + + LWLockInitialize(BufferDescriptorGetContentLock(buf), + ctl->contentLWLockTrancheId); + LWLockInitialize(BufferDescriptorGetIOLock(buf), + ctl->IOLWLockTrancheId); + } + + for (i = 0; i < NUM_BUFFER_PARTITIONS; i++) + LWLockInitialize(&BufMappingPartitionLWLocks[i].lock, + ctl->partitionLWLockTrancheId); + } + else + { + BufferIOLWLocks = (LWLockMinimallyPadded *) + (ctl->IOLWLockTranche.array_base); + BufMappingPartitionLWLocks = (LWLockMinimallyPadded *) + (ctl->partitionLWLockTranche.array_base); + } + + /* Register tranches in main tranches array */ + LWLockRegisterTranche(ctl->IOLWLockTrancheId, + &ctl->IOLWLockTranche); + LWLockRegisterTranche(ctl->contentLWLockTrancheId, + &ctl->contentLWLockTranche); + LWLockRegisterTranche(ctl->partitionLWLockTrancheId, + &ctl->partitionLWLockTranche); +} + /* * Initialize shared buffer pool @@ -109,15 +185,14 @@ InitBufferPool(void) * management of this list is done by freelist.c. */ buf->freeNext = i + 1; - - buf->io_in_progress_lock = LWLockAssign(); - buf->content_lock = LWLockAssign(); } /* Correct last entry of linked list */ GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; } + InitBufferLWLockTranches(); + /* Init other shared buffer-management stuff */ StrategyInitialize(!foundDescs); } @@ -144,5 +219,19 @@ BufferShmemSize(void) /* size of stuff controlled by freelist.c */ size = add_size(size, StrategyShmemSize()); + /* size of a control struct */ + size = add_size(size, sizeof(BufferCtlData)); + + /* size of IO LWLocks */ + size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); + /* to allow aligning IO LWLocks */ + size = add_size(size, PG_CACHE_LINE_SIZE); + + /* size of buffer partition lwlocks */ + size = add_size(size, mul_size(NUM_BUFFER_PARTITIONS, + sizeof(LWLockMinimallyPadded))); + /* to allow aligning buffer partition LWLocks */ + size = add_size(size, PG_CACHE_LINE_SIZE); + return size; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 63188a3..a3f52ff 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -738,7 +738,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if (!isLocalBuf) { if (mode == RBM_ZERO_AND_LOCK) - LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); else if (mode == RBM_ZERO_AND_CLEANUP_LOCK) LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr)); } @@ -879,7 +879,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) && !isLocalBuf) { - LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); } if (isLocalBuf) @@ -1045,7 +1045,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * happens to be trying to split the page the first one got from * StrategyGetBuffer.) */ - if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED)) + if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED)) { /* * If using a nondefault strategy, and writing the buffer @@ -1067,7 +1067,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, StrategyRejectBuffer(strategy, buf)) { /* Drop lock/pin and loop around for another buffer */ - LWLockRelease(buf->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(buf)); UnpinBuffer(buf, true); continue; } @@ -1080,7 +1080,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rnode.node.relNode); FlushBuffer(buf, NULL); - LWLockRelease(buf->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(buf)); TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum, smgr->smgr_rnode.node.spcNode, @@ -1395,7 +1395,7 @@ MarkBufferDirty(Buffer buffer) Assert(BufferIsPinned(buffer)); /* unfortunately we can't check if the lock is held exclusively */ - Assert(LWLockHeldByMe(bufHdr->content_lock)); + Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); LockBufHdr(bufHdr); @@ -1595,8 +1595,8 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner) if (ref->refcount == 0) { /* I'd better not still hold any locks on the buffer */ - Assert(!LWLockHeldByMe(buf->content_lock)); - Assert(!LWLockHeldByMe(buf->io_in_progress_lock)); + Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf))); + Assert(!LWLockHeldByMe(BufferDescriptorGetIOLock(buf))); LockBufHdr(buf); @@ -2116,11 +2116,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) * buffer is clean by the time we've locked it.) */ PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); FlushBuffer(bufHdr, NULL); - LWLockRelease(bufHdr->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); return result | BUF_WRITTEN; @@ -2926,9 +2926,9 @@ FlushRelationBuffers(Relation rel) (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); FlushBuffer(bufHdr, rel->rd_smgr); - LWLockRelease(bufHdr->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } else @@ -2978,9 +2978,9 @@ FlushDatabaseBuffers(Oid dbid) (bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { PinBuffer_Locked(bufHdr); - LWLockAcquire(bufHdr->content_lock, LW_SHARED); + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED); FlushBuffer(bufHdr, NULL); - LWLockRelease(bufHdr->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); UnpinBuffer(bufHdr, true); } else @@ -3080,7 +3080,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) Assert(GetPrivateRefCount(buffer) > 0); /* here, either share or exclusive lock is OK */ - Assert(LWLockHeldByMe(bufHdr->content_lock)); + Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); /* * This routine might get called many times on the same page, if we are @@ -3233,11 +3233,11 @@ LockBuffer(Buffer buffer, int mode) buf = GetBufferDescriptor(buffer - 1); if (mode == BUFFER_LOCK_UNLOCK) - LWLockRelease(buf->content_lock); + LWLockRelease(BufferDescriptorGetContentLock(buf)); else if (mode == BUFFER_LOCK_SHARE) - LWLockAcquire(buf->content_lock, LW_SHARED); + LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED); else if (mode == BUFFER_LOCK_EXCLUSIVE) - LWLockAcquire(buf->content_lock, LW_EXCLUSIVE); + LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE); else elog(ERROR, "unrecognized buffer lock mode: %d", mode); } @@ -3258,7 +3258,7 @@ ConditionalLockBuffer(Buffer buffer) buf = GetBufferDescriptor(buffer - 1); - return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE); + return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE); } /* @@ -3468,8 +3468,8 @@ WaitIO(BufferDesc *buf) UnlockBufHdr(buf); if (!(sv_flags & BM_IO_IN_PROGRESS)) break; - LWLockAcquire(buf->io_in_progress_lock, LW_SHARED); - LWLockRelease(buf->io_in_progress_lock); + LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_SHARED); + LWLockRelease(BufferDescriptorGetIOLock(buf)); } } @@ -3502,7 +3502,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) * Grab the io_in_progress lock so that other processes can wait for * me to finish the I/O. */ - LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); + LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE); LockBufHdr(buf); @@ -3516,7 +3516,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) * him to get unwedged. */ UnlockBufHdr(buf); - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease(BufferDescriptorGetIOLock(buf)); WaitIO(buf); } @@ -3526,7 +3526,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) { /* someone else already did the I/O */ UnlockBufHdr(buf); - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease(BufferDescriptorGetIOLock(buf)); return false; } @@ -3574,7 +3574,7 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, int set_flag_bits) InProgressBuf = NULL; - LWLockRelease(buf->io_in_progress_lock); + LWLockRelease(BufferDescriptorGetIOLock(buf)); } /* @@ -3599,7 +3599,7 @@ AbortBufferIO(void) * we can use TerminateBufferIO. Anyone who's executing WaitIO on the * buffer will be in a busy spin until we succeed in doing this. */ - LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); + LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE); LockBufHdr(buf); Assert(buf->flags & BM_IO_IN_PROGRESS); diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index b13ebc6..aa24d2c 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -353,9 +353,6 @@ NumLWLocks(void) /* Predefined LWLocks */ numLocks = NUM_FIXED_LWLOCKS; - /* bufmgr.c needs two for each shared buffer */ - numLocks += 2 * NBuffers; - /* proc.c needs one for each backend or auxiliary process */ numLocks += MaxBackends + NUM_AUXILIARY_PROCS; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 19247c4..cdfed92 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -95,6 +95,9 @@ typedef struct buftag (a).forkNum == (b).forkNum \ ) +/* in buf_init.c */ +extern PGDLLIMPORT LWLockMinimallyPadded *BufMappingPartitionLWLocks; + /* * The shared buffer mapping table is partitioned to reduce contention. * To determine which partition lock a given tag requires, compute the tag's @@ -104,10 +107,9 @@ typedef struct buftag #define BufTableHashPartition(hashcode) \ ((hashcode) % NUM_BUFFER_PARTITIONS) #define BufMappingPartitionLock(hashcode) \ - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \ - BufTableHashPartition(hashcode)].lock) + (&BufMappingPartitionLWLocks[BufTableHashPartition(hashcode)].lock) #define BufMappingPartitionLockByIndex(i) \ - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock) + (&BufMappingPartitionLWLocks[i].lock) /* * BufferDesc -- shared descriptor/state data for a single shared buffer. @@ -138,17 +140,15 @@ typedef struct BufferDesc { BufferTag tag; /* ID of page contained in buffer */ BufFlags flags; /* see bit definitions above */ - uint16 usage_count; /* usage counter for clock sweep code */ + uint8 usage_count; /* usage counter for clock sweep code */ + slock_t buf_hdr_lock; /* protects the above fields */ unsigned refcount; /* # of backends holding pins on buffer */ int wait_backend_pid; /* backend PID of pin-count waiter */ - slock_t buf_hdr_lock; /* protects the above fields */ - int buf_id; /* buffer's index number (from 0) */ int freeNext; /* link in freelist chain */ - LWLock *io_in_progress_lock; /* to wait for I/O to complete */ - LWLock *content_lock; /* to lock access to buffer contents */ + LWLock content_lock; /* to lock access to buffer contents */ } BufferDesc; /* @@ -179,10 +179,37 @@ typedef union BufferDescPadded char pad[BUFFERDESC_PAD_TO_SIZE]; } BufferDescPadded; +/* Number of partitions of the shared buffer mapping hashtable */ +#define NUM_BUFFER_PARTITIONS 128 + +/* Control struct for the buffer manager data */ +typedef struct BufferCtlData +{ + int IOLWLockTrancheId; + LWLockTranche IOLWLockTranche; + + int partitionLWLockTrancheId; + LWLockTranche partitionLWLockTranche; + + int contentLWLockTrancheId; + LWLockTranche contentLWLockTranche; + + char IOLWLockTrancheName[NAMEDATALEN]; + char partitionLWLockTrancheName[NAMEDATALEN]; + char contentLWLockTrancheName[NAMEDATALEN]; +} BufferCtlData; + +/* in buf_init.c */ +extern PGDLLIMPORT LWLockMinimallyPadded *BufferIOLWLocks; + #define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc) #define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)]) #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1) +#define BufferDescriptorGetIOLock(bdesc) \ + (&(BufferIOLWLocks[(bdesc)->buf_id]).lock) +#define BufferDescriptorGetContentLock(bdesc) \ + ((LWLock*) (&(bdesc)->content_lock)) /* * The freeNext field is either the index of the next freelist entry, diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 4653e09..494e14e 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -76,22 +76,28 @@ typedef struct LWLock * (Of course, we have to also ensure that the array start address is suitably * aligned.) * - * On a 32-bit platforms a LWLock will these days fit into 16 bytes, but since - * that didn't use to be the case and cramming more lwlocks into a cacheline - * might be detrimental performancewise we still use 32 byte alignment - * there. So, both on 32 and 64 bit platforms, it should fit into 32 bytes - * unless slock_t is really big. We allow for that just in case. + * We pad out the main LWLocks so we have one per cache line to minimize + * contention. Other tranches, with differing usage patterns, are allocated + * differently. */ -#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64) +#define LWLOCK_PADDED_SIZE PG_CACHE_LINE_SIZE +#define LWLOCK_MINIMAL_SIZE (sizeof(LWLock) <= 32 ? 32 : 64) typedef union LWLockPadded { LWLock lock; char pad[LWLOCK_PADDED_SIZE]; } LWLockPadded; + extern PGDLLIMPORT LWLockPadded *MainLWLockArray; extern char *MainLWLockNames[]; +typedef union LWLockMinimallyPadded +{ + LWLock lock; + char pad[LWLOCK_MINIMAL_SIZE]; +} LWLockMinimallyPadded; + /* Names for fixed lwlocks */ #include "storage/lwlocknames.h" @@ -101,9 +107,6 @@ extern char *MainLWLockNames[]; * having this file include lock.h or bufmgr.h would be backwards. */ -/* Number of partitions of the shared buffer mapping hashtable */ -#define NUM_BUFFER_PARTITIONS 128 - /* Number of partitions the shared lock tables are divided into */ #define LOG2_NUM_LOCK_PARTITIONS 4 #define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS) @@ -113,9 +116,7 @@ extern char *MainLWLockNames[]; #define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS) /* Offsets for various chunks of preallocated lwlocks. */ -#define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS -#define LOCK_MANAGER_LWLOCK_OFFSET \ - (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS) +#define LOCK_MANAGER_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS #define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \ (LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS) #define NUM_FIXED_LWLOCKS \
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers