0 is already

daveg Tue, 23 Aug 2011 00:25:29 -0700

On Sun, Aug 14, 2011 at 12:16:39AM -0400, Robert Haas wrote:
> On Fri, Aug 12, 2011 at 7:19 PM, daveg <da...@sonic.net> wrote:
> > This seems to be bug month for my client. Now there are seeing periods
> > where all new connections fail immediately with the error:
> >
> >   FATAL:  lock AccessShareLock on object 0/1260/0 is already held
...
> > What can I do to help track this down?
> 
> I've seen that error (though not that exact fact pattern) caused by
> bad RAM.  It's unclear to me what else could cause it.
> 
> In terms of debugging, it seems like it might be sensible to start by
> injecting some debugging code that dumps out the contents of the LOCK
> and LOCALLOCK structures at the point the error occurs.


I've made up the attached patch to print this, please suggest any additions.
I'll deploy this on a couple of the production hosts that have had the
issue this evening, but there is no telling when or if it will strike next.

-dg

-- 
David Gould       da...@sonic.net      510 536 1443    510 282 0869
If simplicity worked, the world would be overrun with insects.

*** postgresql-9.0.4/src/backend/storage/lmgr/lock.c.orig       2011-08-22 
13:46:29.386428943 -0700
--- postgresql-9.0.4/src/backend/storage/lmgr/lock.c    2011-08-23 
00:12:43.456422966 -0700
***************
*** 469,474 ****
--- 469,534 ----
        return LockAcquireExtended(locktag, lockmode, sessionLock, dontWait, 
true);
  }
  
+ int DG_FORCE_TRAP = 0;
+ 
+ inline static void
+ DG_LOCK_PRINT(const char *where, const LOCK *lock, LOCKMODE type)
+ {
+       elog(LOG,
+                "%s: lock(%p) id(%u,%u,%u,%u,%u,%u) grantMask(%x) "
+                "req(%d,%d,%d,%d,%d,%d,%d)=%d "
+                "grant(%d,%d,%d,%d,%d,%d,%d)=%d wait(%d) type(%s)",
+                where, lock,
+                lock->tag.locktag_field1, lock->tag.locktag_field2,
+                lock->tag.locktag_field3, lock->tag.locktag_field4,
+                lock->tag.locktag_type, lock->tag.locktag_lockmethodid,
+                lock->grantMask,
+                lock->requested[1], lock->requested[2], lock->requested[3],
+                lock->requested[4], lock->requested[5], lock->requested[6],
+                lock->requested[7], lock->nRequested,
+                lock->granted[1], lock->granted[2], lock->granted[3],
+                lock->granted[4], lock->granted[5], lock->granted[6],
+                lock->granted[7], lock->nGranted,
+                lock->waitProcs.size,
+                LockMethods[LOCK_LOCKMETHOD(*lock)]->lockModeNames[type]);
+ }
+ 
+ 
+ inline static void
+ DG_PROCLOCK_PRINT(const char *where, const PROCLOCK *proclockP)
+ {
+       elog(LOG,
+                "%s: proclock(%p) lock(%p) method(%u) proc(%p) "
+          "hold(%x) release(%x) "
+          "links(p,n): lock=(%p,%p), proc=(%p,%p)",
+                where, proclockP, proclockP->tag.myLock,
+                PROCLOCK_LOCKMETHOD(*(proclockP)),
+                proclockP->tag.myProc,
+                (int) proclockP->holdMask, (int) proclockP->releaseMask,
+                proclockP->lockLink.prev, proclockP->lockLink.next,
+                proclockP->procLink.prev, proclockP->procLink.next
+               );
+ }
+ 
+ inline static void
+ DG_LOCALLOCK_PRINT(const char *where, const LOCALLOCK *localP)
+ {
+       elog(LOG,
+                "%s: locallock(%p) id(%u,%u,%u,%u,%u,%u mode %x) "
+                "lock(%p), proclock(%p) "
+                "hashcode %x  nlocks %ld  numLockOwners %d  maxLockOwners %d ",
+                where, localP,
+                localP->tag.lock.locktag_field1, 
localP->tag.lock.locktag_field2,
+                localP->tag.lock.locktag_field3, 
localP->tag.lock.locktag_field4,
+                localP->tag.lock.locktag_type, 
localP->tag.lock.locktag_lockmethodid,
+                localP->tag.mode,
+                localP->lock, localP->proclock,
+                localP->hashcode, localP->nLocks,
+                localP->numLockOwners, localP->maxLockOwners
+                /* localP->lockOwners[0].owner, localP->lockOwners.nlocks  "%p 
%d" */
+               );
+ }
+ 
  /*
   * LockAcquireExtended - allows us to specify additional options
   *
***************
*** 500,505 ****
--- 560,568 ----
        LWLockId        partitionLock;
        int                     status;
        bool            log_lock = false;
+       int                     DG_found_local = -1;
+       int                     DG_found_lock = -1;
+       int                     DG_found_proc = -1;
  
        if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
                elog(ERROR, "unrecognized lock method: %d", lockmethodid);
***************
*** 540,546 ****
        locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
                                                                                
  (void *) &localtag,
                                                                                
  HASH_ENTER, &found);
! 
        /*
         * if it's a new locallock object, initialize it
         */
--- 603,609 ----
        locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash,
                                                                                
  (void *) &localtag,
                                                                                
  HASH_ENTER, &found);
!       DG_found_local = found ? 1 : 0;
        /*
         * if it's a new locallock object, initialize it
         */
***************
*** 620,625 ****
--- 683,689 ----
                                                                                
                hashcode,
                                                                                
                HASH_ENTER_NULL,
                                                                                
                &found);
+       DG_found_lock = found ? 1 : 0;
        if (!lock)
        {
                LWLockRelease(partitionLock);
***************
*** 672,677 ****
--- 736,742 ----
                                                                                
                                proclock_hashcode,
                                                                                
                                HASH_ENTER_NULL,
                                                                                
                                &found);
+       DG_found_proc = found ? 1 : 0;
        if (!proclock)
        {
                /* Ooops, not enough shmem for the proclock */
***************
*** 771,782 ****
         * We shouldn't already hold the desired lock; else locallock table is
         * broken.
         */
!       if (proclock->holdMask & LOCKBIT_ON(lockmode))
                elog(ERROR, "lock %s on object %u/%u/%u is already held",
                         lockMethodTable->lockModeNames[lockmode],
                         lock->tag.locktag_field1, lock->tag.locktag_field2,
                         lock->tag.locktag_field3);
! 
        /*
         * If lock requested conflicts with locks requested by waiters, must 
join
         * wait queue.  Otherwise, check for conflict with already-held locks.
--- 836,858 ----
         * We shouldn't already hold the desired lock; else locallock table is
         * broken.
         */
!       if (DG_FORCE_TRAP || proclock->holdMask & LOCKBIT_ON(lockmode))
!       {
!               DG_FORCE_TRAP = 0;
!               elog(LOG,
!                        "LockAcquire: already held: holdMask %x LOCKBIT %x "
!                        "found_local %d found_lock %d found_proc %d ",
!                        proclock->holdMask, LOCKBIT_ON(lockmode),
!                        DG_found_local, DG_found_lock, DG_found_proc);
!               DG_LOCALLOCK_PRINT("LockAcquire LOCALLOCK", locallock);
!               DG_LOCK_PRINT("LockAcquire LOCK", lock, lockmode);
!               DG_PROCLOCK_PRINT("LockAcquire PROCLOCK", proclock);
! 
                elog(ERROR, "lock %s on object %u/%u/%u is already held",
                         lockMethodTable->lockModeNames[lockmode],
                         lock->tag.locktag_field1, lock->tag.locktag_field2,
                         lock->tag.locktag_field3);
!       }
        /*
         * If lock requested conflicts with locks requested by waiters, must 
join
         * wait queue.  Otherwise, check for conflict with already-held locks.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] OperationalError: FATAL: lock AccessShareLock on object 0/1260/0 is already

Reply via email to