Hi there, I created two patches improving the log messages generated by log_lock_waits. The first patch shows the process IDs holding a lock we try to acquire (show_pids_in_lock_log.patch), sample output (log_lock_waits=on required):
session 1: BEGIN; LOCK TABLE foo IN SHARE MODE; session 2: BEGIN; LOCK TABLE foo IN SHARE MODE; session 3: BEGIN; LOCK TABLE foo IN EXCLUSIVE MODE; Output w/o patch: LOG: process 13777 still waiting for ExclusiveLock on relation 16385 of database 16384 after 1000.030 ms Output with patch: LOG: process 13777 still waiting for ExclusiveLock on relation 16385 of database 16384 after 1000.030 ms CONTEXT: processes owning lock: 13775, 13776 The second patch (show_table_name_and_tuple_in_lock_log.patch) includes relation info (table name and OID) as well as some tuple information (if available). Sample output (log_lock_waits=on required): session 1: CREATE TABLE foo (val integer); INSERT INTO foo (val) VALUES (1); BEGIN; UPDATE foo SET val = 3; session 2: BEGIN; UPDATE TABLE foo SET val = 2; Output w/o patch: LOG: process 24774 acquired ShareLock on transaction 696 after 11688.720 ms Output with patch: LOG: process 24774 acquired ShareLock on transaction 696 after 11688.720 ms CONTEXT: relation name: foo (OID 16385) tuple (ctid (0,1)): (1) Regarding this patch I am not really sure where to put the functions. Currently they are located in backend/storage/lmgr/lmgr.c because XactLockTableWait() is located there, too. What do you think? I also created two test specs for easy creation of the log output; however, I was not able to provide an expected file since the process IDs vary from test run to test run. Regards, -- Christian Kruse http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 122afb2..fdfacdf 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -1195,13 +1195,19 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) */ if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) { - StringInfoData buf; + StringInfoData buf, + buf1; const char *modename; long secs; int usecs; long msecs; + SHM_QUEUE *procLocks; + PROCLOCK *proclock; + bool first = true; + int lockHoldersNum = 0; initStringInfo(&buf); + initStringInfo(&buf1); DescribeLockTag(&buf, &locallock->tag.lock); modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, lockmode); @@ -1211,10 +1217,42 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; + LWLockAcquire(partitionLock, LW_SHARED); + + procLocks = &(lock->procLocks); + proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, + offsetof(PROCLOCK, lockLink)); + + while (proclock) + { + if (MyProc->pid != proclock->tag.myProc->pid) + { + if (first) + { + appendStringInfo(&buf1, "%d", + proclock->tag.myProc->pid); + first = false; + } + else + appendStringInfo(&buf1, ", %d", + proclock->tag.myProc->pid); + + lockHoldersNum++; + } + + proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, + offsetof(PROCLOCK, lockLink)); + } + + LWLockRelease(partitionLock); + if (deadlock_state == DS_SOFT_DEADLOCK) ereport(LOG, (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms", - MyProcPid, modename, buf.data, msecs, usecs))); + MyProcPid, modename, buf.data, msecs, usecs), + (errcontext(ngettext("process owning lock: %s", + "processes owning lock: %s", + lockHoldersNum), buf1.data)))); else if (deadlock_state == DS_HARD_DEADLOCK) { /* @@ -1226,13 +1264,19 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) */ ereport(LOG, (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", - MyProcPid, modename, buf.data, msecs, usecs))); + MyProcPid, modename, buf.data, msecs, usecs), + (errcontext(ngettext("process owning lock: %s", + "processes owning lock: %s", + lockHoldersNum), buf1.data)))); } if (myWaitStatus == STATUS_WAITING) ereport(LOG, (errmsg("process %d still waiting for %s on %s after %ld.%03d ms", - MyProcPid, modename, buf.data, msecs, usecs))); + MyProcPid, modename, buf.data, msecs, usecs), + (errcontext(ngettext("process owning lock: %s", + "processes owning lock: %s", + lockHoldersNum), buf1.data)))); else if (myWaitStatus == STATUS_OK) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", @@ -1252,7 +1296,10 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) if (deadlock_state != DS_HARD_DEADLOCK) ereport(LOG, (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms", - MyProcPid, modename, buf.data, msecs, usecs))); + MyProcPid, modename, buf.data, msecs, usecs), + (errcontext(ngettext("process owning lock: %s", + "processes owning lock: %s", + lockHoldersNum), buf1.data)))); } /* @@ -1262,6 +1309,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) deadlock_state = DS_NO_DEADLOCK; pfree(buf.data); + pfree(buf1.data); } } while (myWaitStatus == STATUS_WAITING);
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index f8545c1..cfa49c2 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -2702,9 +2702,11 @@ l1: if (infomask & HEAP_XMAX_IS_MULTI) { + XactLockTableWaitSetupErrorContextCallback(relation, &tp); /* wait for multixact */ MultiXactIdWait((MultiXactId) xwait, MultiXactStatusUpdate, NULL, infomask); + XactLockTableWaitCleanupErrorContextCallback(); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -2730,7 +2732,10 @@ l1: else { /* wait for regular transaction to end */ + XactLockTableWaitSetupErrorContextCallback(relation, &tp); + XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -3254,9 +3259,11 @@ l2: TransactionId update_xact; int remain; + XactLockTableWaitSetupErrorContextCallback(relation, &oldtup); /* wait for multixact */ MultiXactIdWait((MultiXactId) xwait, mxact_status, &remain, infomask); + XactLockTableWaitCleanupErrorContextCallback(); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -3330,7 +3337,10 @@ l2: else { /* wait for regular transaction to end */ + XactLockTableWaitSetupErrorContextCallback(relation, &oldtup); + XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* @@ -4398,7 +4408,11 @@ l3: RelationGetRelationName(relation)))); } else + { + XactLockTableWaitSetupErrorContextCallback(relation, tuple); MultiXactIdWait((MultiXactId) xwait, status, NULL, infomask); + XactLockTableWaitCleanupErrorContextCallback(); + } /* if there are updates, follow the update chain */ if (follow_updates && @@ -4453,7 +4467,11 @@ l3: RelationGetRelationName(relation)))); } else + { + XactLockTableWaitSetupErrorContextCallback(relation, tuple); XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); + } /* if there are updates, follow the update chain */ if (follow_updates && @@ -5139,9 +5157,14 @@ l4: if (needwait) { + XactLockTableWaitSetupErrorContextCallback(rel, &mytup); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); XactLockTableWait(members[i].xid); pfree(members); + + XactLockTableWaitCleanupErrorContextCallback(); + goto l4; } if (res != HeapTupleMayBeUpdated) @@ -5199,8 +5222,13 @@ l4: &needwait); if (needwait) { + XactLockTableWaitSetupErrorContextCallback(rel, &mytup); + LockBuffer(buf, BUFFER_LOCK_UNLOCK); XactLockTableWait(rawxmax); + + XactLockTableWaitCleanupErrorContextCallback(); + goto l4; } if (res != HeapTupleMayBeUpdated) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index a452fea..7f79615 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -165,7 +165,11 @@ top: { /* Have to wait for the other guy ... */ _bt_relbuf(rel, buf); + + XactLockTableWaitSetupErrorContextCallback(rel, NULL); XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); + /* start over... */ _bt_freestack(stack); goto top; diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 7ad9720..6e80e37 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2297,7 +2297,11 @@ IndexBuildHeapScan(Relation heapRelation, * Must drop the lock on the buffer before we wait */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + XactLockTableWaitSetupErrorContextCallback(heapRelation, heapTuple); XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); + goto recheck; } } @@ -2343,7 +2347,11 @@ IndexBuildHeapScan(Relation heapRelation, * Must drop the lock on the buffer before we wait */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + XactLockTableWaitSetupErrorContextCallback(heapRelation, heapTuple); XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); + goto recheck; } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 6be17a9..2ee6f51 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1981,7 +1981,11 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, if (TransactionIdIsValid(SnapshotDirty.xmax)) { ReleaseBuffer(buffer); + + XactLockTableWaitSetupErrorContextCallback(relation, &tuple); XactLockTableWait(SnapshotDirty.xmax); + XactLockTableWaitCleanupErrorContextCallback(); + continue; /* loop back to repeat heap_fetch */ } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 39e3b2e..65aca6e 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1307,7 +1307,11 @@ retry: if (TransactionIdIsValid(xwait)) { index_endscan(index_scan); + + XactLockTableWaitSetupErrorContextCallback(heap, tup); XactLockTableWait(xwait); + XactLockTableWaitCleanupErrorContextCallback(); + goto retry; } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index a978172..74eddab 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -18,12 +18,14 @@ #include "access/subtrans.h" #include "access/transam.h" #include "access/xact.h" +#include "access/htup_details.h" #include "catalog/catalog.h" #include "miscadmin.h" #include "storage/lmgr.h" #include "storage/procarray.h" #include "utils/inval.h" - +#include "utils/lsyscache.h" +#include "mb/pg_wchar.h" /* * RelationInitLockInfo @@ -533,6 +535,142 @@ ConditionalXactLockTableWait(TransactionId xid) return true; } +static struct { + Relation rel; + HeapTuple tuple; +} XactLockTableWaitLockInfos; +ErrorContextCallback XactLockTableWaitErrorContext; + +/* + * XactLockTableWaitErrorContextCallback + * error context callback set up by + * XactLockTableWaitSetupErrorContextCallback. It reports some + * tuple information and the relation of a lock to aquire + * + */ +static void +XactLockTableWaitErrorContextCallback(void *arg) +{ + StringInfoData buf; + int i; + bool write_comma = false; + int maxfieldlen = 30; + + if (XactLockTableWaitLockInfos.rel != NULL) + { + errcontext("relation name: %s (OID %u)", + RelationGetRelationName(XactLockTableWaitLockInfos.rel), + RelationGetRelid(XactLockTableWaitLockInfos.rel)); + } + + if (XactLockTableWaitLockInfos.tuple != NULL) + { + /* + * Can't produce an error message including the tuple if we're in a + * possibly aborted transaction state, database access might not be safe. + */ + if (geterrlevel() >= ERROR) + { + errcontext("tuple ctid (%u,%u)", + BlockIdGetBlockNumber(&(XactLockTableWaitLockInfos.tuple->t_self.ip_blkid)), + XactLockTableWaitLockInfos.tuple->t_self.ip_posid); + } + else + { + TupleDesc desc = RelationGetDescr(XactLockTableWaitLockInfos.rel); + Datum *values = palloc(desc->natts * sizeof(*values)); + bool *nulls = palloc(desc->natts * sizeof(*values)); + + heap_deform_tuple(XactLockTableWaitLockInfos.tuple, desc, values, nulls); + + initStringInfo(&buf); + appendStringInfoChar(&buf, '('); + + for (i = 0; i < desc->natts; i++) + { + char *val; + int vallen; + + if (nulls[i]) + val = "null"; + else + { + Oid foutoid; + bool typisvarlena; + + getTypeOutputInfo(desc->attrs[i]->atttypid, + &foutoid, &typisvarlena); + + val = OidOutputFunctionCall(foutoid, values[i]); + } + + if (write_comma) + appendStringInfoString(&buf, ", "); + else + write_comma = true; + + vallen = strlen(val); + if (vallen <= maxfieldlen) + appendStringInfoString(&buf, val); + else + { + vallen = pg_mbcliplen(val, vallen, maxfieldlen); + appendBinaryStringInfo(&buf, val, vallen); + appendStringInfoString(&buf, "..."); + } + } + + appendStringInfoChar(&buf, ')'); + + errcontext("tuple (ctid (%u,%u)): %s", + BlockIdGetBlockNumber(&(XactLockTableWaitLockInfos.tuple->t_self.ip_blkid)), + XactLockTableWaitLockInfos.tuple->t_self.ip_posid, buf.data); + + pfree(buf.data); + pfree(values); + pfree(nulls); + } + } +} + +/* + * XactLockTableWaitSetupErrorContextCallback + * Sets up the error context callback for reporting tuple + * information and relation for a lock to aquire + * + * Use this before calling XactTableLockWait() or MultiXactIdWait() + */ +void +XactLockTableWaitSetupErrorContextCallback(Relation rel, HeapTuple tuple) +{ + /* + * assure that we haven't already set up the same callback + */ + Assert(XactLockTableWaitLockInfos.rel == NULL); + + XactLockTableWaitLockInfos.rel = rel; + XactLockTableWaitLockInfos.tuple = tuple; + + XactLockTableWaitErrorContext.callback = XactLockTableWaitErrorContextCallback; + XactLockTableWaitErrorContext.previous = error_context_stack; + error_context_stack = &XactLockTableWaitErrorContext; +} + +/* + * XactLockTableWaitCleanupErrorContextCallback + * Removes the error callback context + * + * Use this after calling XactTableLockWait() or MultiXactIdWait() + */ +void +XactLockTableWaitCleanupErrorContextCallback(void) +{ + error_context_stack = error_context_stack->previous; + + XactLockTableWaitLockInfos.rel = NULL; + XactLockTableWaitLockInfos.tuple = NULL; +} + /* * WaitForLockersMultiple * Wait until no transaction holds locks that conflict with the given diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 65eb3bd..16e0410 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -1204,6 +1204,23 @@ geterrposition(void) } /* + * geterrlevel --- return the currently set error level + * + * This is only intended for use in error callback subroutines, since there + * is no other place outside elog.c where the concept is meaningful. + */ +int +geterrlevel(void) +{ + ErrorData *edata = &errordata[errordata_stack_depth]; + + /* we don't bother incrementing recursion_depth */ + CHECK_STACK_DEPTH(); + + return edata->elevel; +} + +/* * getinternalerrposition --- same for internal error position * * This is only intended for use in error callback subroutines, since there diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 1a8c018..ff97888 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -18,6 +18,7 @@ #include "storage/itemptr.h" #include "storage/lock.h" #include "utils/rel.h" +#include "access/htup.h" extern void RelationInitLockInfo(Relation relation); @@ -57,6 +58,9 @@ extern void XactLockTableDelete(TransactionId xid); extern void XactLockTableWait(TransactionId xid); extern bool ConditionalXactLockTableWait(TransactionId xid); +extern void XactLockTableWaitSetupErrorContextCallback(Relation rel, HeapTuple tuple); +extern void XactLockTableWaitCleanupErrorContextCallback(void); + /* Lock VXIDs, specified by conflicting locktags */ extern void WaitForLockers(LOCKTAG heaplocktag, LOCKMODE lockmode); extern void WaitForLockersMultiple(List *locktags, LOCKMODE lockmode); diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index 59d6f5e..3b627c4 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -224,6 +224,7 @@ extern int err_generic_string(int field, const char *str); extern int geterrcode(void); extern int geterrposition(void); +extern int geterrlevel(void); extern int getinternalerrposition(void);
setup { DROP TABLE IF EXISTS foo; CREATE TABLE foo ( key int PRIMARY KEY, value int ); } teardown { DROP TABLE foo; } session "s1" setup { SET log_lock_waits = on; SET client_min_messages = LOG; } step "s1-lock" { BEGIN; LOCK TABLE foo IN SHARE MODE; } step "s1-commit" { SELECT pg_sleep(1.5); COMMIT; } session "s2" setup { SET log_lock_waits = on; SET client_min_messages = LOG; } step "s2-lock" { BEGIN; LOCK TABLE foo IN SHARE MODE; } step "s2-commit" { SELECT pg_sleep(1.5); COMMIT; } session "s3" setup { SET log_lock_waits = on; SET client_min_messages = LOG; SET statement_timeout = 1100; } step "s3-lock" { BEGIN; LOCK TABLE foo IN EXCLUSIVE MODE; } step "s3-commit" { ROLLBACK; } permutation "s1-lock" "s2-lock" "s3-lock" "s1-commit" "s2-commit" "s3-commit"
setup { DROP TABLE IF EXISTS foo; CREATE TABLE foo ( key int PRIMARY KEY, value int ); INSERT INTO foo VALUES (1, 1); } teardown { DROP TABLE foo; } session "s1" setup { SET log_lock_waits = on; SET client_min_messages = LOG; } step "s1-update" { BEGIN; UPDATE foo SET value = 2; } step "s1-commit" { SELECT pg_sleep(1.5); COMMIT; } session "s2" setup { SET log_lock_waits = on; SET client_min_messages = LOG; BEGIN; } step "s2-update" { UPDATE foo SET value = 3; } teardown { ROLLBACK; } permutation "s1-update" "s2-update" "s1-commit"
pgp96tAQvTK_x.pgp
Description: PGP signature