Simon Riggs wrote:
> On Tue, 2010-04-13 at 21:09 +0300, Heikki Linnakangas wrote:
>> A quick fix would be to check if there's any entries in the hash table
>> before scanning it. That would eliminate the overhead when there's no
>> in-progress transactions in the master. But as soon as there's even one,
>> the overhead comes back.
>
> Any fix should be fairly quick because of the way its modularised - with
> something like this in mind.
>
> I'll try a circular buffer implementation, with fastpath.
I started experimenting with a sorted array based implementation on
Tuesday but got carried away with other stuff. I now got back to that
and cleaned it up.
How does the attached patch look like? It's probably similar to what you
had in mind.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 88169b4..4a04051 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -64,8 +64,10 @@ typedef struct ProcArrayStruct
int numProcs; /* number of valid procs entries */
int maxProcs; /* allocated size of procs array */
- int numKnownAssignedXids; /* current number of known assigned
- * xids */
+ int firstKnownAssigned; /* location of first valid known
+ * assigned xid in the array */
+ int lastKnownAssigned; /* location of last valid known
+ * assigned xid in the array */
int maxKnownAssignedXids; /* allocated size of known assigned
* xids */
@@ -87,7 +89,8 @@ static ProcArrayStruct *procArray;
/*
* Bookkeeping for tracking emulated transactions in recovery
*/
-static HTAB *KnownAssignedXidsHash;
+static TransactionId *KnownAssignedXidsArray;
+static bool *KnownAssignedXidsValidArray;
static TransactionId latestObservedXid = InvalidTransactionId;
/*
@@ -201,28 +204,33 @@ CreateSharedProcArray(void)
/* Normal processing */
procArray->numProcs = 0;
procArray->maxProcs = PROCARRAY_MAXPROCS;
- procArray->numKnownAssignedXids = 0;
procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
+ procArray->firstKnownAssignedXid = 0;
+ procArray->lastKnownAssignedXid = 0;
procArray->lastOverflowedXid = InvalidTransactionId;
}
if (XLogRequestRecoveryConnections)
{
- /* Create or attach to the KnownAssignedXids hash table */
- HASHCTL info;
-
- MemSet(&info, 0, sizeof(info));
- info.keysize = sizeof(TransactionId);
- info.entrysize = sizeof(TransactionId);
- info.hash = tag_hash;
-
- KnownAssignedXidsHash = ShmemInitHash("KnownAssignedXids Hash",
- TOTAL_MAX_CACHED_SUBXIDS,
- TOTAL_MAX_CACHED_SUBXIDS,
- &info,
- HASH_ELEM | HASH_FUNCTION);
- if (!KnownAssignedXidsHash)
- elog(FATAL, "could not initialize known assigned xids hash table");
+ Size size;
+ /* Create or attach to the KnownAssignedXids arrays */
+ size = mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS);
+ KnownAssignedXidsArray = ShmemInitStruct("KnownAssignedXidsArray",
+ size,
+ &found);
+ if (!KnownAssignedXidsArray)
+ elog(FATAL, "could not initialize known assigned xids array");
+ if (!found)
+ MemSet(KnownAssignedXidsArray, 0, size);
+
+ size = mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS);
+ KnownAssignedXidsValidArray = ShmemInitStruct("KnownAssignedXidsValidArray",
+ size,
+ &found);
+ if (!KnownAssignedXidsValidArray)
+ elog(FATAL, "could not initialize known assigned xids used array");
+ if (!found)
+ MemSet(KnownAssignedXidsValidArray, 0, size);
}
}
@@ -2162,7 +2170,7 @@ DisplayXidCache(void)
*
* During recovery we do not fret too much about the distinction between
* top-level xids and subtransaction xids. We hold both together in
- * a hash table called KnownAssignedXids. In backends, this is copied into
+ * an array called KnownAssignedXids. In backends, this is copied into
* snapshots in GetSnapshotData(), taking advantage
* of the fact that XidInMVCCSnapshot() doesn't care about the distinction
* either. Subtransaction xids are effectively treated as top-level xids
@@ -2207,7 +2215,7 @@ RecordKnownAssignedTransactionIds(TransactionId xid)
* We can see WAL records before the running-xacts snapshot that contain
* XIDs that are not in the running-xacts snapshot, but that we know to
* have finished before the running-xacts snapshot was taken. Don't waste
- * precious shared memory by keeping them in the hash table.
+ * precious shared memory by keeping them in the array.
*
* We can also see WAL records before the running-xacts snapshot that
* contain XIDs that are not in the running-xacts snapshot for a different
@@ -2330,24 +2338,30 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
/*
* Private module functions to manipulate KnownAssignedXids
*
- * There are 3 main users of the KnownAssignedXids data structure:
+ * We use a fixed-size sorted array in shared memory to keep track of XIDs
+ * that we consider as in-progress in the master at this time. This data
+ * structure is called KnownAssignedXids, and it has 4 main users:
*
- * * backends taking snapshots
+ * * backends taking snapshots, copying all XIDs in the array
+ * * backends checking if an XID exists in the array
* * startup process adding new knownassigned xids
* * startup process removing xids as transactions end
*
- * If we make KnownAssignedXids a simple sorted array then the first two
- * operations are fast, but the last one is at least O(N). If we make
- * KnownAssignedXids a hash table then the last two operations are fast,
- * though we have to do more work at snapshot time. Doing more work at
- * commit could slow down taking snapshots anyway because of lwlock
- * contention. Scanning the hash table is O(N) on the max size of the array,
- * so performs poorly in comparison when we have very low numbers of
- * write transactions to process. But at least it is constant overhead
- * and a sequential memory scan will utilise hardware memory readahead
- * to give much improved performance. In any case the emphasis must be on
- * having the standby process changes quickly so that it can provide
- * high availability. So we choose to implement as a hash table.
+ * Typically, there is only a few entries in the array, compared to the
+ * maximum size, so we keep track of the first and last valid entry in the
+ * array to make scanning it quick. That also makes it quick to add entries
+ * to the end. XIDs are assigned in monotonically increasing order, so new
+ * entries always go to the end.
+ *
+ * When an entry is removed, it's merely marked as invalid, but left in
+ * place in the array. There is a second array of booleans,
+ * KnownAssignedXidsValidArray, which keeps track of which entries in the
+ * KnownAssignedXidsArray are valid.
+ *
+ * Because we leave the entry in place when an XID is marked as removed, the
+ * array will eventually fill up. When an entry needs to be added and there
+ * is no room for it, the array is compacted by copying all valid entries to
+ * the beginning of the array, removing all invalid entries.
*/
/*
@@ -2358,41 +2372,49 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
static void
KnownAssignedXidsAdd(TransactionId *xids, int nxids)
{
- TransactionId *result;
- bool found;
int i;
for (i = 0; i < nxids; i++)
{
- Assert(TransactionIdIsValid(xids[i]));
+ TransactionId xid = xids[i];
- elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xids[i]);
+ Assert(TransactionIdIsValid(xid));
- procArray->numKnownAssignedXids++;
- if (procArray->numKnownAssignedXids > procArray->maxKnownAssignedXids)
- {
- KnownAssignedXidsDisplay(LOG);
- LWLockRelease(ProcArrayLock);
- elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
- }
+ elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xid);
- result = (TransactionId *) hash_search(KnownAssignedXidsHash, &xids[i], HASH_ENTER,
- &found);
+ Assert(procArray->lastKnownAssignedXid == 0 ||
+ TransactionIdFollows(xid, KnownAssignedXidsArray[procArray->lastKnownAssignedXid - 1]));
- if (!result)
+ /* Compact if necessary */
+ if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
{
- LWLockRelease(ProcArrayLock);
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of shared memory")));
- }
+ int j;
+ int k;
- if (found)
- {
- KnownAssignedXidsDisplay(LOG);
- LWLockRelease(ProcArrayLock);
- elog(ERROR, "found duplicate KnownAssignedXid %u", xids[i]);
+ k = 0;
+ for (j = procArray->firstKnownAssignedXid; j < procArray->lastKnownAssignedXid; j++)
+ {
+ if (KnownAssignedXidsValidArray[j])
+ {
+ KnownAssignedXidsArray[k] = KnownAssignedXidsArray[j];
+ KnownAssignedXidsValidArray[k] = true;
+ k++;
+ }
+ }
+ procArray->firstKnownAssignedXid = 0;
+ procArray->lastKnownAssignedXid = k;
+
+ if (procArray->lastKnownAssignedXid == procArray->maxKnownAssignedXids)
+ {
+ KnownAssignedXidsDisplay(LOG);
+ LWLockRelease(ProcArrayLock);
+ elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
+ }
}
+
+ KnownAssignedXidsArray[procArray->lastKnownAssignedXid] = xid;
+ KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid] = true;
+ procArray->lastKnownAssignedXid++;
}
}
@@ -2404,10 +2426,21 @@ KnownAssignedXidsAdd(TransactionId *xids, int nxids)
static bool
KnownAssignedXidsExist(TransactionId xid)
{
- bool found;
+ int low = procArray->firstKnownAssignedXid;
+ int high = procArray->lastKnownAssignedXid - 1;
- (void) hash_search(KnownAssignedXidsHash, &xid, HASH_FIND, &found);
- return found;
+ while (low <= high)
+ {
+ int middle = low + (high - low) / 2;
+
+ if (xid == KnownAssignedXidsArray[middle])
+ return true;
+ else if (xid > KnownAssignedXidsArray[middle])
+ low = middle + 1;
+ else
+ high = middle - 1;
+ }
+ return false;
}
/*
@@ -2418,17 +2451,37 @@ KnownAssignedXidsExist(TransactionId xid)
static void
KnownAssignedXidsRemove(TransactionId xid)
{
- bool found;
+ int low = procArray->firstKnownAssignedXid;
+ int high = procArray->lastKnownAssignedXid - 1;
Assert(TransactionIdIsValid(xid));
elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
- (void) hash_search(KnownAssignedXidsHash, &xid, HASH_REMOVE, &found);
-
- if (found)
- procArray->numKnownAssignedXids--;
- Assert(procArray->numKnownAssignedXids >= 0);
+ while (low <= high)
+ {
+ int middle = low + (high - low) / 2;
+
+ if (xid == KnownAssignedXidsArray[middle])
+ {
+ KnownAssignedXidsValidArray[middle] = false;
+ if (middle == procArray->lastKnownAssignedXid - 1)
+ {
+ while (procArray->lastKnownAssignedXid >= 0 && !KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid - 1])
+ procArray->lastKnownAssignedXid--;
+ }
+ if (middle == procArray->firstKnownAssignedXid)
+ {
+ while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid && !KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+ procArray->firstKnownAssignedXid++;
+ }
+ return;
+ }
+ else if (xid > KnownAssignedXidsArray[middle])
+ low = middle + 1;
+ else
+ high = middle - 1;
+ }
/*
* We can fail to find an xid if the xid came from a subtransaction that
@@ -2466,26 +2519,30 @@ static int
KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
TransactionId xmax)
{
- HASH_SEQ_STATUS status;
- TransactionId *knownXid;
int count = 0;
+ int i;
- hash_seq_init(&status, KnownAssignedXidsHash);
- while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+ for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
{
+ TransactionId xid = KnownAssignedXidsArray[i];
+
+ if (!KnownAssignedXidsValidArray[i])
+ continue;
+
/*
- * Filter out anything higher than xmax
+ * Filter out anything higher than xmax. The array is sorted so
+ * we can stop as soon as we find one that's too big
*/
- if (TransactionIdPrecedes(xmax, *knownXid))
- continue;
+ if (TransactionIdPrecedes(xmax, xid))
+ break;
- *xarray = *knownXid;
+ *xarray = xid;
xarray++;
count++;
/* update xmin if required */
- if (TransactionIdPrecedes(*knownXid, *xmin))
- *xmin = *knownXid;
+ if (TransactionIdPrecedes(xid, *xmin))
+ *xmin = xid;
}
return count;
@@ -2500,34 +2557,34 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
static void
KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
{
- TransactionId *knownXid;
- HASH_SEQ_STATUS status;
+ int i;
- if (TransactionIdIsValid(xid))
- elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
- else
+ if (!TransactionIdIsValid(xid))
+ {
elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
+ procArray->firstKnownAssignedXid = procArray->lastKnownAssignedXid = 0;
+ return;
+ }
+ elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
- hash_seq_init(&status, KnownAssignedXidsHash);
- while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
+ for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
{
- TransactionId removeXid = *knownXid;
- bool found;
+ TransactionId removeXid = KnownAssignedXidsArray[i];
+ if (!KnownAssignedXidsValidArray[i])
+ continue;
if (!TransactionIdIsValid(xid) || TransactionIdPrecedes(removeXid, xid))
{
if (keepPreparedXacts && StandbyTransactionIdIsPrepared(removeXid))
continue;
- else
- {
- (void) hash_search(KnownAssignedXidsHash, &removeXid,
- HASH_REMOVE, &found);
- if (found)
- procArray->numKnownAssignedXids--;
- Assert(procArray->numKnownAssignedXids >= 0);
- }
+ KnownAssignedXidsValidArray[i] = false;
}
}
+ while (procArray->lastKnownAssignedXid >= 0 && !KnownAssignedXidsValidArray[procArray->lastKnownAssignedXid - 1])
+ procArray->lastKnownAssignedXid--;
+
+ while (procArray->firstKnownAssignedXid < procArray->lastKnownAssignedXid && !KnownAssignedXidsValidArray[procArray->firstKnownAssignedXid])
+ procArray->firstKnownAssignedXid++;
}
/*
@@ -2538,26 +2595,21 @@ KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
static void
KnownAssignedXidsDisplay(int trace_level)
{
- HASH_SEQ_STATUS status;
- TransactionId *knownXid;
StringInfoData buf;
- TransactionId *xids;
int nxids;
int i;
- xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS);
- nxids = 0;
-
- hash_seq_init(&status, KnownAssignedXidsHash);
- while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
- xids[nxids++] = *knownXid;
-
- qsort(xids, nxids, sizeof(TransactionId), xidComparator);
-
initStringInfo(&buf);
- for (i = 0; i < nxids; i++)
- appendStringInfo(&buf, "%u ", xids[i]);
+ nxids = 0;
+ for (i = procArray->firstKnownAssignedXid; i < procArray->lastKnownAssignedXid; i++)
+ {
+ if (KnownAssignedXidsValidArray[i])
+ {
+ appendStringInfo(&buf, "%u ", KnownAssignedXidsArray[i]);
+ nxids++;
+ }
+ }
elog(trace_level, "%d KnownAssignedXids %s", nxids, buf.data);
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index c5c1228..eee0d58 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -203,7 +203,7 @@
* Enable debugging print statements for WAL-related operations; see
* also the wal_debug GUC var.
*/
-/* #define WAL_DEBUG */
+#define WAL_DEBUG
/*
* Enable tracing of resource consumption during sort operations;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers