On Sun, Aug 14, 2011 at 12:16:39AM -0400, Robert Haas wrote:
> On Fri, Aug 12, 2011 at 7:19 PM, daveg wrote:
> > This seems to be bug month for my client. Now there are seeing periods
> > where all new connections fail immediately with the error:
> >
> > FATAL: lock AccessShareLock on object 0/1260/0 is already held
...
> > What can I do to help track this down?
>
> I've seen that error (though not that exact fact pattern) caused by
> bad RAM. It's unclear to me what else could cause it.
>
> In terms of debugging, it seems like it might be sensible to start by
> injecting some debugging code that dumps out the contents of the LOCK
> and LOCALLOCK structures at the point the error occurs.
I've made up the attached patch to print this, please suggest any additions.
I'll deploy this on a couple of the production hosts that have had the
issue this evening, but there is no telling when or if it will strike next.
-dg
--
David Gould da...@sonic.net 510 536 1443510 282 0869
If simplicity worked, the world would be overrun with insects.
*** postgresql-9.0.4/src/backend/storage/lmgr/lock.c.orig 2011-08-22
13:46:29.386428943 -0700
--- postgresql-9.0.4/src/backend/storage/lmgr/lock.c2011-08-23
00:12:43.456422966 -0700
***
*** 469,474
--- 469,534
return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait,
true);
}
+ int DG_FORCE_TRAP = 0;
+
+ inline static void
+ DG_LOCK_PRINT(const char *where, const LOCK *lock, LOCKMODE type)
+ {
+ elog(LOG,
+"%s: lock(%p) id(%u,%u,%u,%u,%u,%u) grantMask(%x) "
+"req(%d,%d,%d,%d,%d,%d,%d)=%d "
+"grant(%d,%d,%d,%d,%d,%d,%d)=%d wait(%d) type(%s)",
+where, lock,
+lock->tag.locktag_field1, lock->tag.locktag_field2,
+lock->tag.locktag_field3, lock->tag.locktag_field4,
+lock->tag.locktag_type, lock->tag.locktag_lockmethodid,
+lock->grantMask,
+lock->requested[1], lock->requested[2], lock->requested[3],
+lock->requested[4], lock->requested[5], lock->requested[6],
+lock->requested[7], lock->nRequested,
+lock->granted[1], lock->granted[2], lock->granted[3],
+lock->granted[4], lock->granted[5], lock->granted[6],
+lock->granted[7], lock->nGranted,
+lock->waitProcs.size,
+LockMethods[LOCK_LOCKMETHOD(*lock)]->lockModeNames[type]);
+ }
+
+
+ inline static void
+ DG_PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
+ {
+ elog(LOG,
+"%s: proclock(%p) lock(%p) method(%u) proc(%p) "
+ "hold(%x) release(%x) "
+ "links(p,n): lock=(%p,%p), proc=(%p,%p)",
+where, proclockP, proclockP->tag.myLock,
+PROCLOCK_LOCKMETHOD(*(proclockP)),
+proclockP->tag.myProc,
+(int) proclockP->holdMask, (int) proclockP->releaseMask,
+proclockP->lockLink.prev, proclockP->lockLink.next,
+proclockP->procLink.prev, proclockP->procLink.next
+ );
+ }
+
+ inline static void
+ DG_LOCALLOCK_PRINT(const char *where, const LOCALLOCK *localP)
+ {
+ elog(LOG,
+"%s: locallock(%p) id(%u,%u,%u,%u,%u,%u mode %x) "
+"lock(%p), proclock(%p) "
+"hashcode %x nlocks %ld numLockOwners %d maxLockOwners %d ",
+where, localP,
+localP->tag.lock.locktag_field1,
localP->tag.lock.locktag_field2,
+localP->tag.lock.locktag_field3,
localP->tag.lock.locktag_field4,
+localP->tag.lock.locktag_type,
localP->tag.lock.locktag_lockmethodid,
+localP->tag.mode,
+localP->lock, localP->proclock,
+localP->hashcode, localP->nLocks,
+localP->numLockOwners, localP->maxLockOwners
+/* localP->lockOwners[0].owner, localP->lockOwners.nlocks "%p
%d" */
+ );
+ }
+
/*
* LockAcquireExtended - allows us to specify additional options
*
***
*** 500,505
--- 560,568
LWLockIdpartitionLock;
int status;
boollog_lock = false;
+ int DG_found_local = -1;
+ int DG_found_lock = -1;
+ int DG_found_proc = -1;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
elog(ERROR, "unrecognized lock method: %d", lockmethodid);
***
*** 540,546
locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
(void *) &localtag,
HASH_ENTER, &found);
!
/*
* if it's a new locallock objec