Thank you for the review. I've made changes according to your comments.
I don't stick on current names in the patch.
I've removed all unnecerrary indirections, and added cache aligning
to LWLock arrays.
On Tue, 17 Nov 2015 19:36:13 +0100
"[email protected]" <[email protected]> wrote:
> On 2015-11-17 14:14:50 +0300, Ildus Kurbangaliev wrote:
> > 1) We can avoid constants, and use a standard steps for tranches
> > creation.
>
> The constants are actually a bit useful, to easily determine
> builtin/non-builtin stuff.
Maybe I'm missing something here, but I don't see much difference with
other tranches, created in Postgres startup. In my opinion they are also
pretty much builtin.
>
> > 2) We have only one global variable (BufferCtl)
>
> Don't see the advantage. This adds another, albeit predictable,
> indirection in frequent callsites. There's no architectural advantage
> in avoiding these.
>
> > 3) Tranches moved to shared memory, so we won't need to do
> > an additional work here.
>
> I can see some benefit, but it also doesn't seem like a huge benefit.
The moving base tranches to shared memory has been discussed many times.
The point is using them later in pg_stat_activity and other monitoring
views.
>
>
> > 4) Also we can kept buffer locks from MainLWLockArray there (that
> > was done in attached patch).
>
> That's the 'blockLocks' thing? That's a pretty bad name. These are
> buffer mapping locks. And this seems like a separate patch, separately
> debated.
Changed to BufMappingPartitionLWLocks. If this patch is all about
separating LWLocks of the buffer manager, why not use one patch for this
task?
>
> > + if (!foundCtl)
> > {
> > - int i;
> > + /* Align descriptors to a cacheline boundary. */
> > + ctl->base.bufferDescriptors = (void *)
> > CACHELINEALIGN(ShmemAlloc(
> > + NBuffers * sizeof(BufferDescPadded) +
> > PG_CACHE_LINE_SIZE)); +
> > + ctl->base.bufferBlocks = (char *)
> > ShmemAlloc(NBuffers * (Size) BLCKSZ);
>
> I'm going to entirely veto this.
>
> > + strncpy(ctl->IOLWLockTrancheName, "buffer_io",
> > + BUFMGR_MAX_NAME_LENGTH);
> > + strncpy(ctl->contentLWLockTrancheName,
> > "buffer_content",
> > + BUFMGR_MAX_NAME_LENGTH);
> > + strncpy(ctl->blockLWLockTrancheName,
> > "buffer_blocks",
> > + BUFMGR_MAX_NAME_LENGTH);
> > +
> > + ctl->IOLocks = (LWLockMinimallyPadded *)
> > ShmemAlloc(
> > + sizeof(LWLockMinimallyPadded) *
> > NBuffers);
>
> This should be cacheline aligned.
Fixed
>
> > - buf->io_in_progress_lock = LWLockAssign();
> > - buf->content_lock = LWLockAssign();
> > +
> > LWLockInitialize(BufferDescriptorGetContentLock(buf),
> > + ctl->contentLWLockTrancheId);
> > + LWLockInitialize(&ctl->IOLocks[i].lock,
> > + ctl->IOLWLockTrancheId);
>
> Seems weird to use the macro accessing content locks, but not IO
> locks.
Fixed
>
> > /* Note: these two macros only work on shared buffers, not local
> > ones! */ -#define BufHdrGetBlock(bufHdr) ((Block)
> > (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ)) +#define
> > BufHdrGetBlock(bufHdr) ((Block) (BufferCtl->bufferBlocks +
> > ((Size) (bufHdr)->buf_id) * BLCKSZ))
>
> That's the additional indirection I'm talking about.
>
> > @@ -353,9 +353,6 @@ NumLWLocks(void)
> > /* Predefined LWLocks */
> > numLocks = NUM_FIXED_LWLOCKS;
> >
> > - /* bufmgr.c needs two for each shared buffer */
> > - numLocks += 2 * NBuffers;
> > -
> > /* proc.c needs one for each backend or auxiliary process
> > */ numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
>
> Didn't you also move the buffer mapping locks... ?
>
> > diff --git a/src/include/storage/buf_internals.h
> > b/src/include/storage/buf_internals.h index 521ee1c..e1795dc 100644
> > --- a/src/include/storage/buf_internals.h
> > +++ b/src/include/storage/buf_internals.h
> > @@ -95,6 +95,7 @@ typedef struct buftag
> > (a).forkNum == (b).forkNum \
> > )
> >
> > +
> > /*
>
> unrelated change.
>
> > * The shared buffer mapping table is partitioned to reduce
> > contention.
> > * To determine which partition lock a given tag requires, compute
> > the tag's @@ -104,10 +105,9 @@ typedef struct buftag
> > #define BufTableHashPartition(hashcode) \
> > ((hashcode) % NUM_BUFFER_PARTITIONS)
> > #define BufMappingPartitionLock(hashcode) \
> > - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
> > - BufTableHashPartition(hashcode)].lock)
> > + (&((BufferCtlData
> > *)BufferCtl)->blockLocks[BufTableHashPartition(hashcode)].lock)
> > #define BufMappingPartitionLockByIndex(i) \
> > - (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
> > + (&((BufferCtlData *)BufferCtl)->blockLocks[i].lock)
>
> Again, unnecessary indirections.
Fixed
>
> > +/* Maximum length of a bufmgr lock name */
> > +#define BUFMGR_MAX_NAME_LENGTH 32
>
> If we were to do this I'd just use NAMEDATALEN.
Fixed
>
> > +/*
> > + * Base control struct for the buffer manager data
> > + * Located in shared memory.
> > + *
> > + * BufferCtl variable points to BufferCtlBase because of only
> > + * the backend code knows about BufferDescPadded, LWLock and
> > + * others structures and the same time it must be usable in
> > + * the frontend code.
> > + */
> > +typedef struct BufferCtlData
> > +{
> > + BufferCtlBase base;
>
> Eeek. What's the point here?
The point was using BufferCtlBase in the backend and frontend code.
The frontend code doesn't know about LWLock and other structures.
Anyway I've removed this code.
--
Ildus Kurbangaliev
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 3ae2848..7ae1491 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -18,8 +18,10 @@
#include "storage/buf_internals.h"
-BufferDescPadded *BufferDescriptors;
-char *BufferBlocks;
+BufferDescPadded *BufferDescriptors;
+char *BufferBlocks;
+LWLockMinimallyPadded *BufferIOLWLocks;
+LWLockMinimallyPadded *BufMappingPartitionLWLocks;
/*
@@ -54,6 +56,80 @@ char *BufferBlocks;
* multiple times. Check the PrivateRefCount infrastructure in bufmgr.c.
*/
+static void
+InitBufferLWLockTranches(void)
+{
+ int i;
+ bool foundCtl;
+
+ BufferCtlData *ctl = (BufferCtlData *) ShmemInitStruct("BufferCtl",
+ sizeof(BufferCtlData), &foundCtl);
+
+ if (!foundCtl)
+ {
+ BufferIOLWLocks = (LWLockMinimallyPadded *) CACHELINEALIGN(ShmemAlloc(
+ sizeof(LWLockMinimallyPadded) * NBuffers + PG_CACHE_LINE_SIZE));
+ BufMappingPartitionLWLocks = (LWLockMinimallyPadded *) CACHELINEALIGN(
+ ShmemAlloc(sizeof(LWLockMinimallyPadded) * NUM_BUFFER_PARTITIONS +
+ PG_CACHE_LINE_SIZE));
+
+ strncpy(ctl->IOLWLockTrancheName, "buffer_io",
+ NAMEDATALEN);
+ strncpy(ctl->contentLWLockTrancheName, "buffer_content",
+ NAMEDATALEN);
+ strncpy(ctl->partitionLWLockTrancheName, "buffer_partition",
+ NAMEDATALEN);
+
+ /* Initialize tranche fields */
+ ctl->IOLWLockTranche.array_base = BufferIOLWLocks;
+ ctl->IOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
+ ctl->IOLWLockTranche.name = ctl->IOLWLockTrancheName;
+
+ ctl->contentLWLockTranche.array_base =
+ ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock);
+ ctl->contentLWLockTranche.array_stride = sizeof(BufferDescPadded);
+ ctl->contentLWLockTranche.name = ctl->contentLWLockTrancheName;
+
+ ctl->partitionLWLockTranche.array_base = BufMappingPartitionLWLocks;
+ ctl->partitionLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
+ ctl->partitionLWLockTranche.name = ctl->partitionLWLockTrancheName;
+
+ ctl->partitionLWLockTrancheId = LWLockNewTrancheId();
+ ctl->contentLWLockTrancheId = LWLockNewTrancheId();
+ ctl->IOLWLockTrancheId = LWLockNewTrancheId();
+
+ /* Initialize LWLocks */
+ for (i = 0; i < NBuffers; i++)
+ {
+ BufferDesc *buf = GetBufferDescriptor(i);
+
+ LWLockInitialize(BufferDescriptorGetContentLock(buf),
+ ctl->contentLWLockTrancheId);
+ LWLockInitialize(BufferDescriptorGetIOLock(buf),
+ ctl->IOLWLockTrancheId);
+ }
+
+ for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
+ LWLockInitialize(&BufMappingPartitionLWLocks[i].lock,
+ ctl->partitionLWLockTrancheId);
+ }
+ else
+ {
+ BufferIOLWLocks = (LWLockMinimallyPadded *)
+ (ctl->IOLWLockTranche.array_base);
+ BufMappingPartitionLWLocks = (LWLockMinimallyPadded *)
+ (ctl->partitionLWLockTranche.array_base);
+ }
+
+ /* Register tranches in main tranches array */
+ LWLockRegisterTranche(ctl->IOLWLockTrancheId,
+ &ctl->IOLWLockTranche);
+ LWLockRegisterTranche(ctl->contentLWLockTrancheId,
+ &ctl->contentLWLockTranche);
+ LWLockRegisterTranche(ctl->partitionLWLockTrancheId,
+ &ctl->partitionLWLockTranche);
+}
+
/*
* Initialize shared buffer pool
@@ -109,15 +185,14 @@ InitBufferPool(void)
* management of this list is done by freelist.c.
*/
buf->freeNext = i + 1;
-
- buf->io_in_progress_lock = LWLockAssign();
- buf->content_lock = LWLockAssign();
}
/* Correct last entry of linked list */
GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
}
+ InitBufferLWLockTranches();
+
/* Init other shared buffer-management stuff */
StrategyInitialize(!foundDescs);
}
@@ -144,5 +219,19 @@ BufferShmemSize(void)
/* size of stuff controlled by freelist.c */
size = add_size(size, StrategyShmemSize());
+ /* size of a control struct */
+ size = add_size(size, sizeof(BufferCtlData));
+
+ /* size of IO LWLocks */
+ size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
+ /* to allow aligning IO LWLocks */
+ size = add_size(size, PG_CACHE_LINE_SIZE);
+
+ /* size of buffer partition lwlocks */
+ size = add_size(size, mul_size(NUM_BUFFER_PARTITIONS,
+ sizeof(LWLockMinimallyPadded)));
+ /* to allow aligning buffer partition LWLocks */
+ size = add_size(size, PG_CACHE_LINE_SIZE);
+
return size;
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 63188a3..a3f52ff 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -738,7 +738,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (!isLocalBuf)
{
if (mode == RBM_ZERO_AND_LOCK)
- LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
}
@@ -879,7 +879,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
!isLocalBuf)
{
- LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
}
if (isLocalBuf)
@@ -1045,7 +1045,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* happens to be trying to split the page the first one got from
* StrategyGetBuffer.)
*/
- if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
+ if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED))
{
/*
* If using a nondefault strategy, and writing the buffer
@@ -1067,7 +1067,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
StrategyRejectBuffer(strategy, buf))
{
/* Drop lock/pin and loop around for another buffer */
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
UnpinBuffer(buf, true);
continue;
}
@@ -1080,7 +1080,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rnode.node.relNode);
FlushBuffer(buf, NULL);
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
smgr->smgr_rnode.node.spcNode,
@@ -1395,7 +1395,7 @@ MarkBufferDirty(Buffer buffer)
Assert(BufferIsPinned(buffer));
/* unfortunately we can't check if the lock is held exclusively */
- Assert(LWLockHeldByMe(bufHdr->content_lock));
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
LockBufHdr(bufHdr);
@@ -1595,8 +1595,8 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner)
if (ref->refcount == 0)
{
/* I'd better not still hold any locks on the buffer */
- Assert(!LWLockHeldByMe(buf->content_lock));
- Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
+ Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)));
+ Assert(!LWLockHeldByMe(BufferDescriptorGetIOLock(buf)));
LockBufHdr(buf);
@@ -2116,11 +2116,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
* buffer is clean by the time we've locked it.)
*/
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, NULL);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
return result | BUF_WRITTEN;
@@ -2926,9 +2926,9 @@ FlushRelationBuffers(Relation rel)
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, rel->rd_smgr);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
else
@@ -2978,9 +2978,9 @@ FlushDatabaseBuffers(Oid dbid)
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, NULL);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
else
@@ -3080,7 +3080,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Assert(GetPrivateRefCount(buffer) > 0);
/* here, either share or exclusive lock is OK */
- Assert(LWLockHeldByMe(bufHdr->content_lock));
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
/*
* This routine might get called many times on the same page, if we are
@@ -3233,11 +3233,11 @@ LockBuffer(Buffer buffer, int mode)
buf = GetBufferDescriptor(buffer - 1);
if (mode == BUFFER_LOCK_UNLOCK)
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
else if (mode == BUFFER_LOCK_SHARE)
- LWLockAcquire(buf->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED);
else if (mode == BUFFER_LOCK_EXCLUSIVE)
- LWLockAcquire(buf->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE);
else
elog(ERROR, "unrecognized buffer lock mode: %d", mode);
}
@@ -3258,7 +3258,7 @@ ConditionalLockBuffer(Buffer buffer)
buf = GetBufferDescriptor(buffer - 1);
- return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
+ return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE);
}
/*
@@ -3468,8 +3468,8 @@ WaitIO(BufferDesc *buf)
UnlockBufHdr(buf);
if (!(sv_flags & BM_IO_IN_PROGRESS))
break;
- LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_SHARED);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
}
}
@@ -3502,7 +3502,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* Grab the io_in_progress lock so that other processes can wait for
* me to finish the I/O.
*/
- LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE);
LockBufHdr(buf);
@@ -3516,7 +3516,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* him to get unwedged.
*/
UnlockBufHdr(buf);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
WaitIO(buf);
}
@@ -3526,7 +3526,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
{
/* someone else already did the I/O */
UnlockBufHdr(buf);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
return false;
}
@@ -3574,7 +3574,7 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, int set_flag_bits)
InProgressBuf = NULL;
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
}
/*
@@ -3599,7 +3599,7 @@ AbortBufferIO(void)
* we can use TerminateBufferIO. Anyone who's executing WaitIO on the
* buffer will be in a busy spin until we succeed in doing this.
*/
- LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE);
LockBufHdr(buf);
Assert(buf->flags & BM_IO_IN_PROGRESS);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index b13ebc6..aa24d2c 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -353,9 +353,6 @@ NumLWLocks(void)
/* Predefined LWLocks */
numLocks = NUM_FIXED_LWLOCKS;
- /* bufmgr.c needs two for each shared buffer */
- numLocks += 2 * NBuffers;
-
/* proc.c needs one for each backend or auxiliary process */
numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 19247c4..cdfed92 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -95,6 +95,9 @@ typedef struct buftag
(a).forkNum == (b).forkNum \
)
+/* in buf_init.c */
+extern PGDLLIMPORT LWLockMinimallyPadded *BufMappingPartitionLWLocks;
+
/*
* The shared buffer mapping table is partitioned to reduce contention.
* To determine which partition lock a given tag requires, compute the tag's
@@ -104,10 +107,9 @@ typedef struct buftag
#define BufTableHashPartition(hashcode) \
((hashcode) % NUM_BUFFER_PARTITIONS)
#define BufMappingPartitionLock(hashcode) \
- (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
- BufTableHashPartition(hashcode)].lock)
+ (&BufMappingPartitionLWLocks[BufTableHashPartition(hashcode)].lock)
#define BufMappingPartitionLockByIndex(i) \
- (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
+ (&BufMappingPartitionLWLocks[i].lock)
/*
* BufferDesc -- shared descriptor/state data for a single shared buffer.
@@ -138,17 +140,15 @@ typedef struct BufferDesc
{
BufferTag tag; /* ID of page contained in buffer */
BufFlags flags; /* see bit definitions above */
- uint16 usage_count; /* usage counter for clock sweep code */
+ uint8 usage_count; /* usage counter for clock sweep code */
+ slock_t buf_hdr_lock; /* protects the above fields */
unsigned refcount; /* # of backends holding pins on buffer */
int wait_backend_pid; /* backend PID of pin-count waiter */
- slock_t buf_hdr_lock; /* protects the above fields */
-
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
- LWLock *io_in_progress_lock; /* to wait for I/O to complete */
- LWLock *content_lock; /* to lock access to buffer contents */
+ LWLock content_lock; /* to lock access to buffer contents */
} BufferDesc;
/*
@@ -179,10 +179,37 @@ typedef union BufferDescPadded
char pad[BUFFERDESC_PAD_TO_SIZE];
} BufferDescPadded;
+/* Number of partitions of the shared buffer mapping hashtable */
+#define NUM_BUFFER_PARTITIONS 128
+
+/* Control struct for the buffer manager data */
+typedef struct BufferCtlData
+{
+ int IOLWLockTrancheId;
+ LWLockTranche IOLWLockTranche;
+
+ int partitionLWLockTrancheId;
+ LWLockTranche partitionLWLockTranche;
+
+ int contentLWLockTrancheId;
+ LWLockTranche contentLWLockTranche;
+
+ char IOLWLockTrancheName[NAMEDATALEN];
+ char partitionLWLockTrancheName[NAMEDATALEN];
+ char contentLWLockTrancheName[NAMEDATALEN];
+} BufferCtlData;
+
+/* in buf_init.c */
+extern PGDLLIMPORT LWLockMinimallyPadded *BufferIOLWLocks;
+
#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
+#define BufferDescriptorGetIOLock(bdesc) \
+ (&(BufferIOLWLocks[(bdesc)->buf_id]).lock)
+#define BufferDescriptorGetContentLock(bdesc) \
+ ((LWLock*) (&(bdesc)->content_lock))
/*
* The freeNext field is either the index of the next freelist entry,
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index 4653e09..494e14e 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -76,22 +76,28 @@ typedef struct LWLock
* (Of course, we have to also ensure that the array start address is suitably
* aligned.)
*
- * On a 32-bit platforms a LWLock will these days fit into 16 bytes, but since
- * that didn't use to be the case and cramming more lwlocks into a cacheline
- * might be detrimental performancewise we still use 32 byte alignment
- * there. So, both on 32 and 64 bit platforms, it should fit into 32 bytes
- * unless slock_t is really big. We allow for that just in case.
+ * We pad out the main LWLocks so we have one per cache line to minimize
+ * contention. Other tranches, with differing usage patterns, are allocated
+ * differently.
*/
-#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64)
+#define LWLOCK_PADDED_SIZE PG_CACHE_LINE_SIZE
+#define LWLOCK_MINIMAL_SIZE (sizeof(LWLock) <= 32 ? 32 : 64)
typedef union LWLockPadded
{
LWLock lock;
char pad[LWLOCK_PADDED_SIZE];
} LWLockPadded;
+
extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
extern char *MainLWLockNames[];
+typedef union LWLockMinimallyPadded
+{
+ LWLock lock;
+ char pad[LWLOCK_MINIMAL_SIZE];
+} LWLockMinimallyPadded;
+
/* Names for fixed lwlocks */
#include "storage/lwlocknames.h"
@@ -101,9 +107,6 @@ extern char *MainLWLockNames[];
* having this file include lock.h or bufmgr.h would be backwards.
*/
-/* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS 128
-
/* Number of partitions the shared lock tables are divided into */
#define LOG2_NUM_LOCK_PARTITIONS 4
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
@@ -113,9 +116,7 @@ extern char *MainLWLockNames[];
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
/* Offsets for various chunks of preallocated lwlocks. */
-#define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS
-#define LOCK_MANAGER_LWLOCK_OFFSET \
- (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS)
+#define LOCK_MANAGER_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS
#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \
(LOCK_MANAGER_LWLOCK_OFFSET + NUM_LOCK_PARTITIONS)
#define NUM_FIXED_LWLOCKS \
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers