On Mon, Mar 14, 2016 at 8:26 AM, Petr Jelinek <[email protected]> wrote:
> Well any value we choose will be very arbitrary. If we look at it from the
> point of maximum absolute disk space we allocate for relation at once,
> the 4MB limit would represent 2.5 orders of magnitude change. That sounds
> like enough for one release cycle, I think we can further tune it if the
> need arises in next one. (with my love for round numbers I would have
> suggested 8MB as that's 3 orders of magnitude, but I am fine with 4MB as
> well)
>
I have modified the patch, this contains the max limit on extra pages,
512(4MB) pages is the max limit.
I have measured the performance also and that looks equally good.
--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c
index 8140418..fcd42b9 100644
--- a/src/backend/access/heap/hio.c
+++ b/src/backend/access/heap/hio.c
@@ -169,6 +169,94 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2,
}
/*
+ * RelationAddOneBlock
+ *
+ * Extend relation by one block and lock the buffer
+ */
+static Buffer
+RelationAddOneBlock(Relation relation, Buffer otherBuffer, BulkInsertState bistate)
+{
+ Buffer buffer;
+ /*
+ * XXX This does an lseek - rather expensive - but at the moment it is the
+ * only way to accurately determine how many blocks are in a relation. Is
+ * it worth keeping an accurate file length in shared memory someplace,
+ * rather than relying on the kernel to do it for us?
+ */
+ buffer = ReadBufferBI(relation, P_NEW, bistate);
+
+ /*
+ * We can be certain that locking the otherBuffer first is OK, since
+ * it must have a lower page number.
+ */
+ if (otherBuffer != InvalidBuffer)
+ LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /*
+ * Now acquire lock on the new page.
+ */
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ return buffer;
+}
+
+/*
+ * RelationAddExtraBlocks
+ *
+ * Extend extra blocks for the relations to avoid the future contention
+ * on the relation extension lock.
+ */
+
+static void
+RelationAddExtraBlocks(Relation relation, BulkInsertState bistate)
+{
+ Page page;
+ Size freespace;
+ BlockNumber blockNum;
+ int extraBlocks = 0;
+ int lockWaiters = 0;
+ Buffer buffer;
+
+ /*
+ * For calculating number of extra blocks to extend, find the level
+ * of contention on this lock, by getting the requester of this lock
+ * and add extra blocks in multiple of waiters.
+ */
+ lockWaiters = RelationExtensionLockWaiter(relation);
+
+ extraBlocks = lockWaiters * 20;
+
+ /* To avoid the cases when there are huge number of lock waiters, and
+ * extend file size by big amount at a time, put some limit on the
+ * max number of pages to be extended at a time.
+ */
+ if (extraBlocks > 512)
+ extraBlocks = 512;
+
+ while (extraBlocks--)
+ {
+ /*
+ * Here we are adding extra blocks to the relation after
+ * adding each block update the information in FSM so that
+ * other backend running parallel can find the block.
+ */
+ buffer = ReadBufferBI(relation, P_NEW, bistate);
+
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ page = BufferGetPage(buffer);
+ PageInit(page, BufferGetPageSize(buffer), 0);
+ freespace = PageGetHeapFreeSpace(page);
+ MarkBufferDirty(buffer);
+ blockNum = BufferGetBlockNumber(buffer);
+
+ UnlockReleaseBuffer(buffer);
+
+ RecordPageWithFreeSpace(relation, blockNum, freespace);
+ }
+}
+
+/*
* RelationGetBufferForTuple
*
* Returns pinned and exclusive-locked buffer of a page in given relation
@@ -233,10 +321,11 @@ RelationGetBufferForTuple(Relation relation, Size len,
bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
Buffer buffer = InvalidBuffer;
Page page;
- Size pageFreeSpace,
- saveFreeSpace;
+ Size pageFreeSpace = 0,
+ saveFreeSpace = 0;
BlockNumber targetBlock,
- otherBlock;
+ otherBlock,
+ lastValidBlock = InvalidBlockNumber;
bool needLock;
len = MAXALIGN(len); /* be conservative */
@@ -308,6 +397,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
}
}
+loop:
while (targetBlock != InvalidBlockNumber)
{
/*
@@ -388,6 +478,8 @@ RelationGetBufferForTuple(Relation relation, Size len,
otherBlock, targetBlock, vmbuffer_other,
vmbuffer);
+ lastValidBlock = targetBlock;
+
/*
* Now we can check to see if there's enough free space here. If so,
* we're done.
@@ -441,27 +533,47 @@ RelationGetBufferForTuple(Relation relation, Size len,
needLock = !RELATION_IS_LOCAL(relation);
if (needLock)
- LockRelationForExtension(relation, ExclusiveLock);
-
- /*
- * XXX This does an lseek - rather expensive - but at the moment it is the
- * only way to accurately determine how many blocks are in a relation. Is
- * it worth keeping an accurate file length in shared memory someplace,
- * rather than relying on the kernel to do it for us?
- */
- buffer = ReadBufferBI(relation, P_NEW, bistate);
-
- /*
- * We can be certain that locking the otherBuffer first is OK, since it
- * must have a lower page number.
- */
- if (otherBuffer != InvalidBuffer)
- LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE);
+ {
+ /*
+ * First try to get the lock in no-wait mode, if succeed extend one
+ * block, else get the lock in normal mode and after we get the lock
+ * extend some extra blocks, extra blocks will be added to satisfy
+ * request of other waiters and avoid future contention. Here instead
+ * of directly taking lock we try no-wait mode, this is to handle the
+ * case, when there is no contention - it should not find the lock
+ * waiter and execute extra instructions.
+ */
+ if (LOCKACQUIRE_OK
+ != RelationExtensionLockConditional(relation, ExclusiveLock))
+ {
+ LockRelationForExtension(relation, ExclusiveLock);
+
+ if (use_fsm)
+ {
+ if (lastValidBlock != InvalidBlockNumber)
+ {
+ targetBlock = RecordAndGetPageWithFreeSpace(relation,
+ lastValidBlock,
+ pageFreeSpace,
+ len + saveFreeSpace);
+ }
+
+ /* Other waiter has extended the block for us*/
+ if (targetBlock != InvalidBlockNumber)
+ {
+ UnlockRelationForExtension(relation, ExclusiveLock);
+ goto loop;
+ }
+
+ RelationAddExtraBlocks(relation, bistate);
+ }
+ }
+ }
- /*
- * Now acquire lock on the new page.
+ /* For all case we need to add at least one block to satisfy our
+ * own request.
*/
- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+ buffer = RelationAddOneBlock(relation, otherBuffer, bistate);
/*
* Release the file-extension lock; it's now OK for someone else to extend
@@ -472,6 +584,8 @@ RelationGetBufferForTuple(Relation relation, Size len,
if (needLock)
UnlockRelationForExtension(relation, ExclusiveLock);
+
+
/*
* We need to initialize the empty new page. Double-check that it really
* is empty (this should never happen, but if it does we don't want to
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 9d16afb..5d9454d 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -341,6 +341,40 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode)
}
/*
+ * RelationExtensionLockConditional
+ *
+ * Same as LockRelationForExtension except it will not wait on the lock.
+ */
+LockAcquireResult
+RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_RELATION_EXTEND(tag,
+ relation->rd_lockInfo.lockRelId.dbId,
+ relation->rd_lockInfo.lockRelId.relId);
+
+ return LockAcquire(&tag, lockmode, false, true);
+}
+
+/*
+ * RelationExtensionLockWaiter
+ *
+ * Count the lock requester for the RelationExtension lock.
+ */
+int
+RelationExtensionLockWaiter(Relation relation)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_RELATION_EXTEND(tag,
+ relation->rd_lockInfo.lockRelId.dbId,
+ relation->rd_lockInfo.lockRelId.relId);
+
+ return LockWaiterCount(&tag);
+}
+
+/*
* UnlockRelationForExtension
*/
void
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index a458c68..cf723ae 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -4380,3 +4380,47 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
LockRelease(&tag, ShareLock, false);
return true;
}
+
+/*
+ * LockWaiterCount
+ *
+ * Find the number of lock requester on this locktag
+ */
+int
+LockWaiterCount(const LOCKTAG *locktag)
+{
+ LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid;
+ LOCALLOCKTAG localtag;
+ LOCALLOCK *locallock;
+ bool found;
+ uint32 hashcode;
+ LWLock *partitionLock;
+ int waiters = 0;
+
+ if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
+ elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+ /*
+ * Find a LOCALLOCK entry for this lock and lockmode
+ */
+ MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */
+ localtag.lock = *locktag;
+ localtag.mode = ExclusiveLock;
+
+ locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
+ (void *) &localtag,
+ HASH_FIND, &found);
+
+
+ hashcode = locallock->hashcode;
+
+ partitionLock = LockHashPartitionLock(hashcode);
+ LWLockAcquire(partitionLock, LW_EXCLUSIVE);
+
+ if (found)
+ waiters = locallock->lock->nRequested;
+
+ LWLockRelease(partitionLock);
+
+ return waiters;
+}
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index e9d41bf..ea8e19e 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -101,4 +101,7 @@ extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid
/* Describe a locktag for error messages */
extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag);
+extern LockAcquireResult RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode);
+extern int RelationExtensionLockWaiter(Relation relation);
+
#endif /* LMGR_H */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 788d50a..3fd74fb 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -572,6 +572,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1,
PGPROC *proc2);
extern void InitDeadLockChecking(void);
+extern int LockWaiterCount(const LOCKTAG *locktag);
+
#ifdef LOCK_DEBUG
extern void DumpLocks(PGPROC *proc);
extern void DumpAllLocks(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers