On 2017-07-31 18:56, Sokolov Yura wrote:
Good day, every one.In attempt to improve performance of YCSB on zipfian distribution, it were found that significant time is spent in XidInMVCCSnapshot in scanning snapshot->xip array. While overall CPU time is not too noticable, it has measurable impact on scaleability. First I tried to sort snapshot->xip in GetSnapshotData, and search in a sorted array. But since snapshot->xip is not touched if no transaction contention occurs, sorting xip always is not best option. Then I sorted xip array on demand in XidInMVCCSnapshot only if search in snapshot->xip occurs (ie lazy sorting). It performs much better, but since it is O(NlogN), sort's execution time is noticable for large number of clients. Third approach (present in attached patch) is making hash table lazily on first search in xip array. Note: hash table is not built if number of "in-progress" xids is less than 60. Tests shows, there is no big benefit from doing so (at least on Intel Xeon). With regards,
Here is new, more correct, patch version, and results for pgbench with zipfian distribution (50% read 50% write) (same to Alik's tests athttps://www.postgresql.org/message-id/bf3b6f54-68c3-417a-bfab-fb4d66f2b...@postgrespro.ru )
clients | master | hashsnap2 | hashsnap2_lwlock --------+--------+-----------+------------------ 10 | 203384 | 203813 | 204852 20 | 334344 | 334268 | 363510 40 | 228496 | 231777 | 383820 70 | 146892 | 148173 | 221326 110 | 99741 | 111580 | 157327 160 | 65257 | 81230 | 112028 230 | 38344 | 56790 | 77514 310 | 22355 | 39249 | 55907 400 | 13402 | 26899 | 39742 500 | 8382 | 17855 | 28362 650 | 5313 | 11450 | 17497 800 | 3352 | 7816 | 11030 (Note: I'm not quite sure, why my numbers for master are lower than Alik's one. Probably, our test methodology differs a bit. Perhaps, it is because I don't recreate tables between client number change, but between branch switch). With regards -- Sokolov Yura aka funny_falcon Postgres Professional: https://postgrespro.ru The Russian Postgres Company
From 80787aee44b7f9b5389476e45f2241073b6c89ee Mon Sep 17 00:00:00 2001 From: Sokolov Yura <funny.fal...@postgrespro.ru> Date: Mon, 10 Jul 2017 12:34:48 +0000 Subject: [PATCH] Make hash table for xip in XidInMVCCSnapshot When lot of concurrent transactions attempts to update single row, then linear scan through running list in XidInMVCCSnapshot became noticebale bottleneck. With this change, hash table is built on first search of xid in snapshot->xip and snapshot->subxip arrays. If size of array is smaller than 60, then linear scan is still used, cause there is no much benefits from building hash then. (at least on Intel Xeon). --- src/backend/storage/ipc/procarray.c | 64 +++++++++++++++++++++++------ src/backend/utils/time/snapmgr.c | 24 +++++++---- src/backend/utils/time/tqual.c | 81 +++++++++++++++++++++++++++---------- src/include/utils/snapshot.h | 11 +++++ 4 files changed, 138 insertions(+), 42 deletions(-) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index a7e8cf2..18cc233 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -1469,6 +1469,49 @@ GetMaxSnapshotSubxidCount(void) } /* + * ExtendXipSizeForHash - calculate xip array size with space for hash table + */ +int +ExtendXipSizeForHash(int xipcnt, uint8* plog) +{ + int size; + uint8 log = 0; + + size = xipcnt; + if (xipcnt >= SnapshotMinHash) + { + log = 1; + while (xipcnt) { + log++; + xipcnt >>= 1; + } + size += 1<<log; + } + *plog = log; + return size; +} + +/* + * AllocateXip - allocate xip array, extended for hash part if needed. + * + * Hash part will be used in tqual.c in XidInMVCCSnapshot (XidInXip). + */ +static TransactionId * +AllocateXip(int max, uint8* plog) +{ + int size; + TransactionId *xip; + + size = ExtendXipSizeForHash(max, plog); + xip = (TransactionId *) malloc(size * sizeof(TransactionId)); + if (xip == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + return xip; +} + +/* * GetSnapshotData -- returns information about running transactions. * * The returned snapshot includes xmin (lowest still-running xact ID), @@ -1537,19 +1580,10 @@ GetSnapshotData(Snapshot snapshot) * First call for this snapshot. Snapshot is same size whether or not * we are in recovery, see later comments. */ - snapshot->xip = (TransactionId *) - malloc(GetMaxSnapshotXidCount() * sizeof(TransactionId)); - if (snapshot->xip == NULL) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - Assert(snapshot->subxip == NULL); - snapshot->subxip = (TransactionId *) - malloc(GetMaxSnapshotSubxidCount() * sizeof(TransactionId)); - if (snapshot->subxip == NULL) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); + snapshot->xip = AllocateXip(GetMaxSnapshotXidCount(), + &snapshot->xhlog); + snapshot->subxip = AllocateXip(GetMaxSnapshotSubxidCount(), + &snapshot->subxhlog); } /* @@ -1757,6 +1791,10 @@ GetSnapshotData(Snapshot snapshot) snapshot->active_count = 0; snapshot->regd_count = 0; snapshot->copied = false; + if (snapshot->xhlog != 0) + snapshot->xhlog |= SnapshotHashNotBuilt; + if (snapshot->subxhlog != 0) + snapshot->subxhlog |= SnapshotHashNotBuilt; if (old_snapshot_threshold < 0) { diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 08a08c8..56c1583 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -662,13 +662,16 @@ CopySnapshot(Snapshot snapshot) Snapshot newsnap; Size subxipoff; Size size; + int xcnt, subxcnt; + uint8 xhlog, subxhlog; Assert(snapshot != InvalidSnapshot); + xcnt = ExtendXipSizeForHash(snapshot->xcnt, &xhlog); + subxcnt = ExtendXipSizeForHash(snapshot->subxcnt, &subxhlog); /* We allocate any XID arrays needed in the same palloc block. */ - size = subxipoff = sizeof(SnapshotData) + - snapshot->xcnt * sizeof(TransactionId); - if (snapshot->subxcnt > 0) + size = subxipoff = sizeof(SnapshotData) + xcnt * sizeof(TransactionId); + if (subxcnt > 0) size += snapshot->subxcnt * sizeof(TransactionId); newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); @@ -677,6 +680,8 @@ CopySnapshot(Snapshot snapshot) newsnap->regd_count = 0; newsnap->active_count = 0; newsnap->copied = true; + newsnap->xhlog = xhlog ? xhlog | SnapshotHashNotBuilt : 0; + newsnap->subxhlog = subxhlog ? subxhlog | SnapshotHashNotBuilt : 0; /* setup XID array */ if (snapshot->xcnt > 0) @@ -2124,16 +2129,18 @@ RestoreSnapshot(char *start_address) Size size; Snapshot snapshot; TransactionId *serialized_xids; + int xcnt, subxcnt; + uint8 xhlog, subxhlog; memcpy(&serialized_snapshot, start_address, sizeof(SerializedSnapshotData)); serialized_xids = (TransactionId *) (start_address + sizeof(SerializedSnapshotData)); + xcnt = ExtendXipSizeForHash(serialized_snapshot.xcnt, &xhlog); + subxcnt = ExtendXipSizeForHash(serialized_snapshot.subxcnt, &subxhlog); /* We allocate any XID arrays needed in the same palloc block. */ - size = sizeof(SnapshotData) - + serialized_snapshot.xcnt * sizeof(TransactionId) - + serialized_snapshot.subxcnt * sizeof(TransactionId); + size = sizeof(SnapshotData) + (xcnt + subxcnt) * sizeof(TransactionId); /* Copy all required fields */ snapshot = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); @@ -2142,8 +2149,10 @@ RestoreSnapshot(char *start_address) snapshot->xmax = serialized_snapshot.xmax; snapshot->xip = NULL; snapshot->xcnt = serialized_snapshot.xcnt; + snapshot->xhlog = xhlog ? xhlog | SnapshotHashNotBuilt : 0; snapshot->subxip = NULL; snapshot->subxcnt = serialized_snapshot.subxcnt; + snapshot->subxhlog = subxhlog ? subxhlog | SnapshotHashNotBuilt : 0; snapshot->suboverflowed = serialized_snapshot.suboverflowed; snapshot->takenDuringRecovery = serialized_snapshot.takenDuringRecovery; snapshot->curcid = serialized_snapshot.curcid; @@ -2161,8 +2170,7 @@ RestoreSnapshot(char *start_address) /* Copy SubXIDs, if present. */ if (serialized_snapshot.subxcnt > 0) { - snapshot->subxip = ((TransactionId *) (snapshot + 1)) + - serialized_snapshot.xcnt; + snapshot->subxip = ((TransactionId *) (snapshot + 1)) + xcnt; memcpy(snapshot->subxip, serialized_xids + serialized_snapshot.xcnt, serialized_snapshot.subxcnt * sizeof(TransactionId)); } diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c index f9da9e1..1c53790 100644 --- a/src/backend/utils/time/tqual.c +++ b/src/backend/utils/time/tqual.c @@ -1450,6 +1450,59 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin); } +static bool +XidInXip(TransactionId xid, TransactionId *xip, uint32 xcnt, uint8 *xhlog) +{ + uint32 i; + uint32 j, k, d, mask; + TransactionId *xiphash; + if (!TransactionIdIsNormal(xid)) + return false; + if (xcnt < SnapshotMinHash || *xhlog == 0) + { + /* full scan for small snapshots and if xiphash is not allocated */ + for (i = 0; i < xcnt; i++) + if (TransactionIdEquals(xid, xip[i])) + return true; + return false; + } + mask = ((uint32)1 << (*xhlog & ~SnapshotHashNotBuilt)) - 1; + xiphash = xip + xcnt; + if (!(*xhlog & SnapshotHashNotBuilt)) + { + d = xid >> *xhlog; + j = k = xid & mask; + while (xiphash[j] != InvalidTransactionId) + { + if (TransactionIdEquals(xiphash[j], xid)) + return true; + j = (k + d) & mask; + d = d*5 + 1; + } + return false; + } + else + { + /* build hash */ + int found = 0; + *xhlog &= ~SnapshotHashNotBuilt; + memset(xiphash, 0, (mask+1) * sizeof(TransactionId)); + for (i = 0; i < xcnt; i++) + { + found |= TransactionIdEquals(xip[i], xid); + d = xip[i] >> *xhlog; + j = k = xip[i] & mask; + while (xiphash[j] != InvalidTransactionId) + { + j = (k + d) & mask; + d = d*5 + 1; + } + xiphash[j] = xip[i]; + } + return found != 0; + } +} + /* * XidInMVCCSnapshot * Is the given XID still-in-progress according to the snapshot? @@ -1463,8 +1516,6 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { - uint32 i; - /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -1496,13 +1547,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - int32 j; - - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (XidInXip(xid, snapshot->subxip, snapshot->subxcnt, &snapshot->subxhlog)) + return true; /* not there, fall through to search xip[] */ } @@ -1523,16 +1569,12 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - for (i = 0; i < snapshot->xcnt; i++) - { - if (TransactionIdEquals(xid, snapshot->xip[i])) - return true; - } + + if (XidInXip(xid, snapshot->xip, snapshot->xcnt, &snapshot->xhlog)) + return true; } else { - int32 j; - /* * In recovery we store all xids in the subxact array because it is by * far the bigger array, and we mostly don't know which xids are @@ -1562,11 +1604,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (XidInXip(xid, snapshot->subxip, snapshot->subxcnt, &snapshot->subxhlog)) + return true; } return false; diff --git a/src/include/utils/snapshot.h b/src/include/utils/snapshot.h index 074cc81..9054f16 100644 --- a/src/include/utils/snapshot.h +++ b/src/include/utils/snapshot.h @@ -23,6 +23,15 @@ typedef struct SnapshotData *Snapshot; #define InvalidSnapshot ((Snapshot) NULL) +/* + * Big running list will be converted into hash table on demand. + * SnapshotMinHash is threshold between "linear scan" and "hashtable on demand". + * SnapshotHashNotBuilt indicates that hash table not built yet. + */ +#define SnapshotMinHash (60) +#define SnapshotHashNotBuilt ((uint8)0x80) +/* Calculate size to hold both array and hash table (if needed) */ +int ExtendXipSizeForHash(int xipcnt, uint8* plog); /* * We use SnapshotData structures to represent both "regular" (MVCC) @@ -76,6 +85,7 @@ typedef struct SnapshotData */ TransactionId *xip; uint32 xcnt; /* # of xact ids in xip[] */ + uint8 xhlog; /* log2 of allocated xip hash part */ /* * For non-historic MVCC snapshots, this contains subxact IDs that are in @@ -92,6 +102,7 @@ typedef struct SnapshotData bool takenDuringRecovery; /* recovery-shaped snapshot? */ bool copied; /* false if it's a static snapshot */ + uint8 subxhlog; /* log2 of allocated subxip hash part */ CommandId curcid; /* in my xact, CID < curcid are visible */ -- 1.8.3.1
test_hashsnap_9.tar.gz
Description: GNU Zip compressed data
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers