Marc Cousin írta:
> The Thursday 29 July 2010 13:55:38, Boszormenyi Zoltan wrote :
>
>> I fixed this by adding CheckLockTimeout() function that works like
>> CheckStatementTimeout() and ensuring that the same start time is
>> used for both deadlock_timeout and lock_timeout if both are active.
>> The preference of errors if their timeout values are equal is:
>> statement_timeout > lock_timeout > deadlock_timeout
>>
>
> As soon as lock_timeout is bigger than deadlock_timeout, it doesn't
> work, with this new version.
>
> Keeping the deadlock_timeout to 1s, when lock_timeout >= 1001,
> lock_timeout doesn't trigger anymore.
>
I missed one case when the lock_timeout_active should have been set
but the timer must not have been re-set, this caused the problem.
I blame the hot weather and having no air conditioning. The second is
now fixed. :-)
I also added one line in autovacuum.c to disable lock_timeout,
in case it's globally set in postgresq.conf as per Alvaro's comment.
Also, I made sure that only one or two timeout causes (one of
deadlock_timeout
and lock_timeout in the first case or statement_timeout plus one of the
other two)
can be active at a time. Previously I was able to trigger a segfault
with the default
1sec deadlock_timeout and lock_timeout = 999 or 1001. Effectively, the
system's
clock resolution makes the lock_timeout and deadlock_timeout equal and
RemoveFromWaitQueue() was called twice. This way it's a lot more robust.
Best regards,
Zoltán Böszörményi
diff -dcrpN pgsql.orig/doc/src/sgml/config.sgml pgsql/doc/src/sgml/config.sgml
*** pgsql.orig/doc/src/sgml/config.sgml 2010-07-26 10:05:37.000000000 +0200
--- pgsql/doc/src/sgml/config.sgml 2010-07-29 11:58:56.000000000 +0200
*************** COPY postgres_log FROM '/full/path/to/lo
*** 4479,4484 ****
--- 4479,4508 ----
</listitem>
</varlistentry>
+ <varlistentry id="guc-lock-timeout" xreflabel="lock_timeout">
+ <term><varname>lock_timeout</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>lock_timeout</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Abort any statement that tries to acquire a heavy-weight lock (e.g. rows,
+ pages, tables, indices or other objects) and the lock has to wait more
+ than the specified number of milliseconds, starting from the time the
+ command arrives at the server from the client.
+ If <varname>log_min_error_statement</> is set to <literal>ERROR</> or lower,
+ the statement that timed out will also be logged. A value of zero
+ (the default) turns off the limitation.
+ </para>
+
+ <para>
+ Setting <varname>lock_timeout</> in
+ <filename>postgresql.conf</> is not recommended because it
+ affects all sessions.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-vacuum-freeze-table-age" xreflabel="vacuum_freeze_table_age">
<term><varname>vacuum_freeze_table_age</varname> (<type>integer</type>)</term>
<indexterm>
diff -dcrpN pgsql.orig/doc/src/sgml/ref/lock.sgml pgsql/doc/src/sgml/ref/lock.sgml
*** pgsql.orig/doc/src/sgml/ref/lock.sgml 2010-04-03 09:23:01.000000000 +0200
--- pgsql/doc/src/sgml/ref/lock.sgml 2010-07-29 11:58:56.000000000 +0200
*************** LOCK [ TABLE ] [ ONLY ] <replaceable cla
*** 39,46 ****
<literal>NOWAIT</literal> is specified, <command>LOCK
TABLE</command> does not wait to acquire the desired lock: if it
cannot be acquired immediately, the command is aborted and an
! error is emitted. Once obtained, the lock is held for the
! remainder of the current transaction. (There is no <command>UNLOCK
TABLE</command> command; locks are always released at transaction
end.)
</para>
--- 39,49 ----
<literal>NOWAIT</literal> is specified, <command>LOCK
TABLE</command> does not wait to acquire the desired lock: if it
cannot be acquired immediately, the command is aborted and an
! error is emitted. If <varname>lock_timeout</varname> is set to a value
! higher than 0, and the lock cannot be acquired under the specified
! timeout value in milliseconds, the command is aborted and an error
! is emitted. Once obtained, the lock is held for the remainder of
! the current transaction. (There is no <command>UNLOCK
TABLE</command> command; locks are always released at transaction
end.)
</para>
diff -dcrpN pgsql.orig/doc/src/sgml/ref/select.sgml pgsql/doc/src/sgml/ref/select.sgml
*** pgsql.orig/doc/src/sgml/ref/select.sgml 2010-06-20 13:59:13.000000000 +0200
--- pgsql/doc/src/sgml/ref/select.sgml 2010-07-29 11:58:56.000000000 +0200
*************** FOR SHARE [ OF <replaceable class="param
*** 1160,1165 ****
--- 1160,1173 ----
</para>
<para>
+ If <literal>NOWAIT</> option is not specified and <varname>lock_timeout</varname>
+ is set to a value higher than 0, and the lock needs to wait more than
+ the specified value in milliseconds, the command reports an error after
+ timing out, rather than waiting indefinitely. The note in the previous
+ paragraph applies to the <varname>lock_timeout</varname>, too.
+ </para>
+
+ <para>
If specific tables are named in <literal>FOR UPDATE</literal>
or <literal>FOR SHARE</literal>,
then only rows coming from those tables are locked; any other
diff -dcrpN pgsql.orig/src/backend/port/posix_sema.c pgsql/src/backend/port/posix_sema.c
*** pgsql.orig/src/backend/port/posix_sema.c 2010-01-02 17:57:50.000000000 +0100
--- pgsql/src/backend/port/posix_sema.c 2010-07-29 11:58:56.000000000 +0200
***************
*** 24,29 ****
--- 24,30 ----
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_sema.h"
+ #include "storage/proc.h"
#ifdef USE_NAMED_POSIX_SEMAPHORES
*************** PGSemaphoreTryLock(PGSemaphore sema)
*** 313,315 ****
--- 314,341 ----
return true;
}
+
+ /*
+ * PGSemaphoreTimedLock
+ *
+ * Lock a semaphore (decrement count), blocking if count would be < 0
+ * Return if lock_timeout expired
+ */
+ void
+ PGSemaphoreTimedLock(PGSemaphore sema, bool interruptOK)
+ {
+ int errStatus;
+
+ do
+ {
+ ImmediateInterruptOK = interruptOK;
+ CHECK_FOR_INTERRUPTS();
+ errStatus = sem_wait(PG_SEM_REF(sema));
+ ImmediateInterruptOK = false;
+ } while (errStatus < 0 && errno == EINTR && !lock_timeout_detected);
+
+ if (lock_timeout_detected)
+ return;
+ if (errStatus < 0)
+ elog(FATAL, "sem_wait failed: %m");
+ }
diff -dcrpN pgsql.orig/src/backend/port/sysv_sema.c pgsql/src/backend/port/sysv_sema.c
*** pgsql.orig/src/backend/port/sysv_sema.c 2010-01-02 17:57:50.000000000 +0100
--- pgsql/src/backend/port/sysv_sema.c 2010-07-29 11:58:56.000000000 +0200
***************
*** 30,35 ****
--- 30,36 ----
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_sema.h"
+ #include "storage/proc.h"
#ifndef HAVE_UNION_SEMUN
*************** PGSemaphoreTryLock(PGSemaphore sema)
*** 497,499 ****
--- 498,530 ----
return true;
}
+
+ /*
+ * PGSemaphoreTimedLock
+ *
+ * Lock a semaphore (decrement count), blocking if count would be < 0
+ * Return if lock_timeout expired
+ */
+ void
+ PGSemaphoreTimedLock(PGSemaphore sema, bool interruptOK)
+ {
+ int errStatus;
+ struct sembuf sops;
+
+ sops.sem_op = -1; /* decrement */
+ sops.sem_flg = 0;
+ sops.sem_num = sema->semNum;
+
+ do
+ {
+ ImmediateInterruptOK = interruptOK;
+ CHECK_FOR_INTERRUPTS();
+ errStatus = semop(sema->semId, &sops, 1);
+ ImmediateInterruptOK = false;
+ } while (errStatus < 0 && errno == EINTR && !lock_timeout_detected);
+
+ if (lock_timeout_detected)
+ return;
+ if (errStatus < 0)
+ elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
+ }
diff -dcrpN pgsql.orig/src/backend/port/win32_sema.c pgsql/src/backend/port/win32_sema.c
*** pgsql.orig/src/backend/port/win32_sema.c 2010-01-02 17:57:50.000000000 +0100
--- pgsql/src/backend/port/win32_sema.c 2010-07-29 11:58:56.000000000 +0200
***************
*** 16,21 ****
--- 16,22 ----
#include "miscadmin.h"
#include "storage/ipc.h"
#include "storage/pg_sema.h"
+ #include "storage/proc.h"
static HANDLE *mySemSet; /* IDs of sema sets acquired so far */
static int numSems; /* number of sema sets acquired so far */
*************** PGSemaphoreTryLock(PGSemaphore sema)
*** 205,207 ****
--- 206,263 ----
/* keep compiler quiet */
return false;
}
+
+ /*
+ * PGSemaphoreTimedLock
+ *
+ * Lock a semaphore (decrement count), blocking if count would be < 0.
+ * Serve the interrupt if interruptOK is true.
+ * Return if lock_timeout expired.
+ */
+ void
+ PGSemaphoreTimedLock(PGSemaphore sema, bool interruptOK)
+ {
+ DWORD ret;
+ HANDLE wh[2];
+
+ wh[0] = *sema;
+ wh[1] = pgwin32_signal_event;
+
+ /*
+ * As in other implementations of PGSemaphoreLock, we need to check for
+ * cancel/die interrupts each time through the loop. But here, there is
+ * no hidden magic about whether the syscall will internally service a
+ * signal --- we do that ourselves.
+ */
+ do
+ {
+ ImmediateInterruptOK = interruptOK;
+ CHECK_FOR_INTERRUPTS();
+
+ errno = 0;
+ ret = WaitForMultipleObjectsEx(2, wh, FALSE, INFINITE, TRUE);
+
+ if (ret == WAIT_OBJECT_0)
+ {
+ /* We got it! */
+ return;
+ }
+ else if (ret == WAIT_OBJECT_0 + 1)
+ {
+ /* Signal event is set - we have a signal to deliver */
+ pgwin32_dispatch_queued_signals();
+ errno = EINTR;
+ }
+ else
+ /* Otherwise we are in trouble */
+ errno = EIDRM;
+
+ ImmediateInterruptOK = false;
+ } while (errno == EINTR && !lock_timeout_detected);
+
+ if (lock_timeout_detected)
+ return;
+ if (errno != 0)
+ ereport(FATAL,
+ (errmsg("could not lock semaphore: error code %d", (int) GetLastError())));
+ }
diff -dcrpN pgsql.orig/src/backend/postmaster/autovacuum.c pgsql/src/backend/postmaster/autovacuum.c
*** pgsql.orig/src/backend/postmaster/autovacuum.c 2010-04-29 12:09:03.000000000 +0200
--- pgsql/src/backend/postmaster/autovacuum.c 2010-08-02 09:36:21.000000000 +0200
*************** AutoVacWorkerMain(int argc, char *argv[]
*** 1521,1530 ****
SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
/*
! * Force statement_timeout to zero to avoid a timeout setting from
! * preventing regular maintenance from being executed.
*/
SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
/*
* Get the info about the database we're going to work on.
--- 1521,1531 ----
SetConfigOption("zero_damaged_pages", "false", PGC_SUSET, PGC_S_OVERRIDE);
/*
! * Force statement_timeout and lock_timeout to zero to avoid a timeout setting
! * from preventing regular maintenance from being executed.
*/
SetConfigOption("statement_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
+ SetConfigOption("lock_timeout", "0", PGC_SUSET, PGC_S_OVERRIDE);
/*
* Get the info about the database we're going to work on.
diff -dcrpN pgsql.orig/src/backend/storage/lmgr/lmgr.c pgsql/src/backend/storage/lmgr/lmgr.c
*** pgsql.orig/src/backend/storage/lmgr/lmgr.c 2010-01-02 17:57:52.000000000 +0100
--- pgsql/src/backend/storage/lmgr/lmgr.c 2010-07-29 11:58:56.000000000 +0200
***************
*** 19,26 ****
--- 19,29 ----
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/catalog.h"
+ #include "catalog/pg_database.h"
#include "miscadmin.h"
#include "storage/lmgr.h"
+ #include "utils/lsyscache.h"
+ #include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/inval.h"
*************** LockRelationOid(Oid relid, LOCKMODE lock
*** 78,83 ****
--- 81,101 ----
res = LockAcquire(&tag, lockmode, false, false);
+ if (res == LOCKACQUIRE_NOT_AVAIL)
+ {
+ char *relname = get_rel_name(relid);
+ if (relname)
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on relation \"%s\"",
+ relname)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on relation with OID %u",
+ relid)));
+ }
+
/*
* Now that we have the lock, check for invalidation messages, so that we
* will update or flush any stale relcache entry before we try to use it.
*************** LockRelation(Relation relation, LOCKMODE
*** 173,178 ****
--- 191,202 ----
res = LockAcquire(&tag, lockmode, false, false);
+ if (res == LOCKACQUIRE_NOT_AVAIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
+ errmsg("could not obtain lock on relation \"%s\"",
+ RelationGetRelationName(relation))));
+
/*
* Now that we have the lock, check for invalidation messages; see notes
* in LockRelationOid.
*************** LockRelationIdForSession(LockRelId *reli
*** 250,256 ****
SET_LOCKTAG_RELATION(tag, relid->dbId, relid->relId);
! (void) LockAcquire(&tag, lockmode, true, false);
}
/*
--- 274,293 ----
SET_LOCKTAG_RELATION(tag, relid->dbId, relid->relId);
! if (LockAcquire(&tag, lockmode, true, false) == LOCKACQUIRE_NOT_AVAIL)
! {
! char *relname = get_rel_name(relid->relId);
! if (relname)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on relation \"%s\"",
! relname)));
! else
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on relation with OID %u",
! relid->relId)));
! }
}
/*
*************** LockRelationForExtension(Relation relati
*** 285,291 ****
relation->rd_lockInfo.lockRelId.dbId,
relation->rd_lockInfo.lockRelId.relId);
! (void) LockAcquire(&tag, lockmode, false, false);
}
/*
--- 322,332 ----
relation->rd_lockInfo.lockRelId.dbId,
relation->rd_lockInfo.lockRelId.relId);
! if (LockAcquire(&tag, lockmode, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on index \"%s\"",
! RelationGetRelationName(relation))));
}
/*
*************** LockPage(Relation relation, BlockNumber
*** 319,325 ****
relation->rd_lockInfo.lockRelId.relId,
blkno);
! (void) LockAcquire(&tag, lockmode, false, false);
}
/*
--- 360,370 ----
relation->rd_lockInfo.lockRelId.relId,
blkno);
! if (LockAcquire(&tag, lockmode, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on page %u of relation \"%s\"",
! blkno, RelationGetRelationName(relation))));
}
/*
*************** LockTuple(Relation relation, ItemPointer
*** 375,381 ****
ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid));
! (void) LockAcquire(&tag, lockmode, false, false);
}
/*
--- 420,430 ----
ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid));
! if (LockAcquire(&tag, lockmode, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on row in relation \"%s\"",
! RelationGetRelationName(relation))));
}
/*
*************** XactLockTableInsert(TransactionId xid)
*** 429,435 ****
SET_LOCKTAG_TRANSACTION(tag, xid);
! (void) LockAcquire(&tag, ExclusiveLock, false, false);
}
/*
--- 478,487 ----
SET_LOCKTAG_TRANSACTION(tag, xid);
! if (LockAcquire(&tag, ExclusiveLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on transaction with ID %u", xid)));
}
/*
*************** XactLockTableWait(TransactionId xid)
*** 473,479 ****
SET_LOCKTAG_TRANSACTION(tag, xid);
! (void) LockAcquire(&tag, ShareLock, false, false);
LockRelease(&tag, ShareLock, false);
--- 525,534 ----
SET_LOCKTAG_TRANSACTION(tag, xid);
! if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on transaction with ID %u", xid)));
LockRelease(&tag, ShareLock, false);
*************** VirtualXactLockTableInsert(VirtualTransa
*** 531,537 ****
SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
! (void) LockAcquire(&tag, ExclusiveLock, false, false);
}
/*
--- 586,596 ----
SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
! if (LockAcquire(&tag, ExclusiveLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on virtual transaction with ID %u",
! vxid.localTransactionId)));
}
/*
*************** VirtualXactLockTableWait(VirtualTransact
*** 549,555 ****
SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
! (void) LockAcquire(&tag, ShareLock, false, false);
LockRelease(&tag, ShareLock, false);
}
--- 608,618 ----
SET_LOCKTAG_VIRTUALTRANSACTION(tag, vxid);
! if (LockAcquire(&tag, ShareLock, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on virtual transaction with ID %u",
! vxid.localTransactionId)));
LockRelease(&tag, ShareLock, false);
}
*************** LockDatabaseObject(Oid classid, Oid obji
*** 598,604 ****
objid,
objsubid);
! (void) LockAcquire(&tag, lockmode, false, false);
}
/*
--- 661,671 ----
objid,
objsubid);
! if (LockAcquire(&tag, lockmode, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on class:object: %u:%u",
! classid, objid)));
}
/*
*************** LockSharedObject(Oid classid, Oid objid,
*** 636,642 ****
objid,
objsubid);
! (void) LockAcquire(&tag, lockmode, false, false);
/* Make sure syscaches are up-to-date with any changes we waited for */
AcceptInvalidationMessages();
--- 703,713 ----
objid,
objsubid);
! if (LockAcquire(&tag, lockmode, false, false) == LOCKACQUIRE_NOT_AVAIL)
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on class:object: %u:%u",
! classid, objid)));
/* Make sure syscaches are up-to-date with any changes we waited for */
AcceptInvalidationMessages();
*************** LockSharedObjectForSession(Oid classid,
*** 678,684 ****
objid,
objsubid);
! (void) LockAcquire(&tag, lockmode, true, false);
}
/*
--- 749,770 ----
objid,
objsubid);
! if (LockAcquire(&tag, lockmode, true, false) == LOCKACQUIRE_NOT_AVAIL)
! switch(classid)
! {
! case DatabaseRelationId:
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on database with ID %u",
! objid)));
! break;
! default:
! ereport(ERROR,
! (errcode(ERRCODE_LOCK_NOT_AVAILABLE),
! errmsg("could not obtain lock on class:object: %u:%u",
! classid, objid)));
! break;
! }
}
/*
diff -dcrpN pgsql.orig/src/backend/storage/lmgr/lock.c pgsql/src/backend/storage/lmgr/lock.c
*** pgsql.orig/src/backend/storage/lmgr/lock.c 2010-04-29 12:09:03.000000000 +0200
--- pgsql/src/backend/storage/lmgr/lock.c 2010-07-29 11:58:56.000000000 +0200
*************** PROCLOCK_PRINT(const char *where, const
*** 255,261 ****
static uint32 proclock_hash(const void *key, Size keysize);
static void RemoveLocalLock(LOCALLOCK *locallock);
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
! static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
PROCLOCK *proclock, LockMethod lockMethodTable);
static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
--- 255,261 ----
static uint32 proclock_hash(const void *key, Size keysize);
static void RemoveLocalLock(LOCALLOCK *locallock);
static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
! static int WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
PROCLOCK *proclock, LockMethod lockMethodTable);
static void CleanUpLock(LOCK *lock, PROCLOCK *proclock,
*************** ProcLockHashCode(const PROCLOCKTAG *proc
*** 447,453 ****
* dontWait: if true, don't wait to acquire lock
*
* Returns one of:
! * LOCKACQUIRE_NOT_AVAIL lock not available, and dontWait=true
* LOCKACQUIRE_OK lock successfully acquired
* LOCKACQUIRE_ALREADY_HELD incremented count for lock already held
*
--- 447,453 ----
* dontWait: if true, don't wait to acquire lock
*
* Returns one of:
! * LOCKACQUIRE_NOT_AVAIL lock not available, either dontWait=true or timeout
* LOCKACQUIRE_OK lock successfully acquired
* LOCKACQUIRE_ALREADY_HELD incremented count for lock already held
*
*************** LockAcquireExtended(const LOCKTAG *lockt
*** 833,839 ****
locktag->locktag_type,
lockmode);
! WaitOnLock(locallock, owner);
TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
locktag->locktag_field2,
--- 833,839 ----
locktag->locktag_type,
lockmode);
! status = WaitOnLock(locallock, owner);
TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
locktag->locktag_field2,
*************** LockAcquireExtended(const LOCKTAG *lockt
*** 848,867 ****
* done when the lock was granted to us --- see notes in WaitOnLock.
*/
! /*
! * Check the proclock entry status, in case something in the ipc
! * communication doesn't work correctly.
! */
! if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
{
! PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
! LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
! /* Should we retry ? */
! LWLockRelease(partitionLock);
! elog(ERROR, "LockAcquire failed");
}
- PROCLOCK_PRINT("LockAcquire: granted", proclock);
- LOCK_PRINT("LockAcquire: granted", lock, lockmode);
}
LWLockRelease(partitionLock);
--- 848,879 ----
* done when the lock was granted to us --- see notes in WaitOnLock.
*/
! switch (status)
{
! case STATUS_OK:
! /*
! * Check the proclock entry status, in case something in the ipc
! * communication doesn't work correctly.
! */
! if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
! {
! PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
! LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
! /* Should we retry ? */
! LWLockRelease(partitionLock);
! elog(ERROR, "LockAcquire failed");
! }
! PROCLOCK_PRINT("LockAcquire: granted", proclock);
! LOCK_PRINT("LockAcquire: granted", lock, lockmode);
! break;
! case STATUS_WAITING:
! PROCLOCK_PRINT("LockAcquire: timed out", proclock);
! LOCK_PRINT("LockAcquire: timed out", lock, lockmode);
! break;
! default:
! elog(ERROR, "LockAcquire invalid status");
! break;
}
}
LWLockRelease(partitionLock);
*************** LockAcquireExtended(const LOCKTAG *lockt
*** 887,893 ****
locktag->locktag_field2);
}
! return LOCKACQUIRE_OK;
}
/*
--- 899,905 ----
locktag->locktag_field2);
}
! return (status == STATUS_OK ? LOCKACQUIRE_OK : LOCKACQUIRE_NOT_AVAIL);
}
/*
*************** GrantAwaitedLock(void)
*** 1165,1178 ****
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process.
*
* The appropriate partition lock must be held at entry.
*/
! static void
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
{
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
char *volatile new_status = NULL;
LOCK_PRINT("WaitOnLock: sleeping on lock",
locallock->lock, locallock->tag.mode);
--- 1177,1196 ----
* Caller must have set MyProc->heldLocks to reflect locks already held
* on the lockable object by this process.
*
+ * Result: returns value of ProcSleep()
+ * STATUS_OK if we acquired the lock
+ * STATUS_ERROR if not (deadlock)
+ * STATUS_WAITING if not (timeout)
+ *
* The appropriate partition lock must be held at entry.
*/
! static int
WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
{
LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
LockMethod lockMethodTable = LockMethods[lockmethodid];
char *volatile new_status = NULL;
+ int wait_status;
LOCK_PRINT("WaitOnLock: sleeping on lock",
locallock->lock, locallock->tag.mode);
*************** WaitOnLock(LOCALLOCK *locallock, Resourc
*** 1214,1221 ****
*/
PG_TRY();
{
! if (ProcSleep(locallock, lockMethodTable) != STATUS_OK)
{
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit
* now.
--- 1232,1244 ----
*/
PG_TRY();
{
! wait_status = ProcSleep(locallock, lockMethodTable);
! switch (wait_status)
{
+ case STATUS_OK:
+ case STATUS_WAITING:
+ break;
+ default:
/*
* We failed as a result of a deadlock, see CheckDeadLock(). Quit
* now.
*************** WaitOnLock(LOCALLOCK *locallock, Resourc
*** 1260,1267 ****
pfree(new_status);
}
! LOCK_PRINT("WaitOnLock: wakeup on lock",
locallock->lock, locallock->tag.mode);
}
/*
--- 1283,1296 ----
pfree(new_status);
}
! if (wait_status == STATUS_OK)
! LOCK_PRINT("WaitOnLock: wakeup on lock",
! locallock->lock, locallock->tag.mode);
! else if (wait_status == STATUS_WAITING)
! LOCK_PRINT("WaitOnLock: timeout on lock",
locallock->lock, locallock->tag.mode);
+
+ return wait_status;
}
/*
diff -dcrpN pgsql.orig/src/backend/storage/lmgr/proc.c pgsql/src/backend/storage/lmgr/proc.c
*** pgsql.orig/src/backend/storage/lmgr/proc.c 2010-07-11 11:14:54.000000000 +0200
--- pgsql/src/backend/storage/lmgr/proc.c 2010-08-02 12:41:13.000000000 +0200
***************
*** 52,57 ****
--- 52,58 ----
/* GUC variables */
int DeadlockTimeout = 1000;
int StatementTimeout = 0;
+ int LockTimeout = 0;
bool log_lock_waits = false;
/* Pointer to this process's PGPROC struct, if any */
*************** static volatile bool statement_timeout_a
*** 79,98 ****
static volatile bool deadlock_timeout_active = false;
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
volatile bool cancel_from_timeout = false;
! /* timeout_start_time is set when log_lock_waits is true */
static TimestampTz timeout_start_time;
/* statement_fin_time is valid only if statement_timeout_active is true */
static TimestampTz statement_fin_time;
static TimestampTz statement_fin_time2; /* valid only in recovery */
static void RemoveProcFromArray(int code, Datum arg);
static void ProcKill(int code, Datum arg);
static void AuxiliaryProcKill(int code, Datum arg);
static bool CheckStatementTimeout(void);
static bool CheckStandbyTimeout(void);
/*
--- 80,106 ----
static volatile bool deadlock_timeout_active = false;
static volatile DeadLockState deadlock_state = DS_NOT_YET_CHECKED;
volatile bool cancel_from_timeout = false;
+ static volatile bool lock_timeout_active = false;
+ volatile bool lock_timeout_detected = false;
! /* timeout_start_time and timeout_fin_time are valid when deadlock_timeout_active is true */
static TimestampTz timeout_start_time;
+ static TimestampTz timeout_fin_time;
/* statement_fin_time is valid only if statement_timeout_active is true */
static TimestampTz statement_fin_time;
static TimestampTz statement_fin_time2; /* valid only in recovery */
+ /* lock_timeout_fin_time is valid only if lock_timeout_active is true */
+ static TimestampTz lock_timeout_fin_time;
static void RemoveProcFromArray(int code, Datum arg);
static void ProcKill(int code, Datum arg);
static void AuxiliaryProcKill(int code, Datum arg);
static bool CheckStatementTimeout(void);
+ static bool CheckLockTimeout(void);
static bool CheckStandbyTimeout(void);
+ static bool enable_sig_alarm_for_lock_timeout(int delayms);
/*
*************** ProcQueueInit(PROC_QUEUE *queue)
*** 797,803 ****
* The lock table's partition lock must be held at entry, and will be held
* at exit.
*
! * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock).
*
* ASSUME: that no one will fiddle with the queue until after
* we release the partition lock.
--- 805,814 ----
* The lock table's partition lock must be held at entry, and will be held
* at exit.
*
! * Result:
! * STATUS_OK if we acquired the lock
! * STATUS_ERROR if not (deadlock)
! * STATUS_WAITING if not (timeout)
*
* ASSUME: that no one will fiddle with the queue until after
* we release the partition lock.
*************** ProcSleep(LOCALLOCK *locallock, LockMeth
*** 951,957 ****
elog(FATAL, "could not set timer for process wakeup");
/*
! * If someone wakes us between LWLockRelease and PGSemaphoreLock,
* PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore
* implementation. While this is normally good, there are cases where a
* saved wakeup might be leftover from a previous operation (for example,
--- 962,976 ----
elog(FATAL, "could not set timer for process wakeup");
/*
! * Reset timer so we are awaken in case of lock timeout.
! * This doesn't modify the timer for deadlock check in case
! * the deadlock check happens earlier.
! */
! if (!enable_sig_alarm_for_lock_timeout(LockTimeout))
! elog(FATAL, "could not set timer for process wakeup");
!
! /*
! * If someone wakes us between LWLockRelease and PGSemaphoreTimedLock,
* PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore
* implementation. While this is normally good, there are cases where a
* saved wakeup might be leftover from a previous operation (for example,
*************** ProcSleep(LOCALLOCK *locallock, LockMeth
*** 969,975 ****
*/
do
{
! PGSemaphoreLock(&MyProc->sem, true);
/*
* waitStatus could change from STATUS_WAITING to something else
--- 988,994 ----
*/
do
{
! PGSemaphoreTimedLock(&MyProc->sem, true);
/*
* waitStatus could change from STATUS_WAITING to something else
*************** ProcSleep(LOCALLOCK *locallock, LockMeth
*** 1093,1099 ****
pfree(buf.data);
}
! } while (myWaitStatus == STATUS_WAITING);
/*
* Disable the timer, if it's still running
--- 1112,1118 ----
pfree(buf.data);
}
! } while (myWaitStatus == STATUS_WAITING && !lock_timeout_detected);
/*
* Disable the timer, if it's still running
*************** ProcSleep(LOCALLOCK *locallock, LockMeth
*** 1109,1114 ****
--- 1128,1141 ----
LWLockAcquire(partitionLock, LW_EXCLUSIVE);
/*
+ * If we're in timeout, so we're not waiting anymore and
+ * we're not the one that the lock will be granted to.
+ * So remove ourselves from the wait queue.
+ */
+ if (lock_timeout_detected)
+ RemoveFromWaitQueue(MyProc, hashcode);
+
+ /*
* We no longer want LockWaitCancel to do anything.
*/
lockAwaited = NULL;
*************** ProcSleep(LOCALLOCK *locallock, LockMeth
*** 1122,1129 ****
/*
* We don't have to do anything else, because the awaker did all the
* necessary update of the lock table and MyProc.
*/
! return MyProc->waitStatus;
}
--- 1149,1158 ----
/*
* We don't have to do anything else, because the awaker did all the
* necessary update of the lock table and MyProc.
+ * RemoveFromWaitQueue() have set MyProc->waitStatus = STATUS_ERROR,
+ * we need to distinguish this case.
*/
! return (lock_timeout_detected ? STATUS_WAITING : MyProc->waitStatus);
}
*************** CheckDeadLock(void)
*** 1301,1306 ****
--- 1330,1352 ----
RemoveFromWaitQueue(MyProc, LockTagHashCode(&(MyProc->waitLock->tag)));
/*
+ * We found a deadlock, we already removed ourselves from
+ * the wait queue above. Disable the lock_timeout check,
+ * so RemoveFromWaitQueue() is not called again. This can happen
+ * in the case when deadlock_timeout and lock_timeout are so close
+ * that the system's clock resolution effectively makes them equal,
+ * so the checks below are both true in the same signal handler:
+ *
+ * TimestampTz now = GetCurrentTimestamp()
+ *
+ * if (timeout_fin_time <= now) ...
+ *
+ * if (lock_timeout_fin_time <= now) ...
+ *
+ */
+ lock_timeout_active = false;
+
+ /*
* Unlock my semaphore so that the interrupted ProcSleep() call can
* finish.
*/
*************** enable_sig_alarm(int delayms, bool is_st
*** 1462,1479 ****
* than normal, but that does no harm.
*/
timeout_start_time = GetCurrentTimestamp();
! fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
! deadlock_timeout_active = true;
! if (fin_time >= statement_fin_time)
return true;
}
else
{
/* Begin deadlock timeout with no statement-level timeout */
deadlock_timeout_active = true;
! /* GetCurrentTimestamp can be expensive, so only do it if we must */
! if (log_lock_waits)
! timeout_start_time = GetCurrentTimestamp();
}
/* If we reach here, okay to set the timer interrupt */
--- 1508,1533 ----
* than normal, but that does no harm.
*/
timeout_start_time = GetCurrentTimestamp();
! timeout_fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
!
! /*
! * Activate deadlock_timeout only if it should happen earlier
! * than statement_timeout.
! */
! if (timeout_fin_time >= statement_fin_time)
return true;
+ deadlock_timeout_active = true;
}
else
{
/* Begin deadlock timeout with no statement-level timeout */
deadlock_timeout_active = true;
! /*
! * Computing the timeout_fin_time is needed because
! * the lock timeout logic checks for it.
! */
! timeout_start_time = GetCurrentTimestamp();
! timeout_fin_time = TimestampTzPlusMilliseconds(timeout_start_time, delayms);
}
/* If we reach here, okay to set the timer interrupt */
*************** enable_sig_alarm(int delayms, bool is_st
*** 1486,1491 ****
--- 1540,1618 ----
}
/*
+ * Enable the SIGALRM interrupt to fire after the specified delay
+ * in case LockTimeout is set.
+ *
+ * This code properly handles nesting of lock_timeout timeout alarm
+ * within deadlock timeout and statement timeout alarms.
+ *
+ * Returns TRUE if okay, FALSE on failure.
+ */
+ static bool
+ enable_sig_alarm_for_lock_timeout(int delayms)
+ {
+ struct itimerval timeval;
+ TimestampTz fin_time;
+
+ lock_timeout_detected = false;
+ if (LockTimeout == 0)
+ return true;
+
+ if (deadlock_timeout_active)
+ /*
+ * ensure the same starting time for deadlock_timeout and lock_timeout
+ */
+ fin_time = timeout_start_time;
+ else
+ fin_time = GetCurrentTimestamp();
+ fin_time = TimestampTzPlusMilliseconds(fin_time, delayms);
+
+ if (statement_timeout_active)
+ {
+ /*
+ * If statement_timeout is active and should happen before us
+ * then don't bother setting up lock_timeout. statement_timeout
+ * may span over multiple acquired locks during the same statement
+ * so leave it in place.
+ */
+ if (fin_time >= statement_fin_time)
+ return true;
+ }
+
+ if (deadlock_timeout_active)
+ {
+ /*
+ * If deadlock_timeout is active but happens earlier then
+ * don't modify the timer but set lock_timeout_active
+ * so the timer will be re-set when deadlock_timeout triggers.
+ */
+ if (fin_time >= timeout_fin_time)
+ {
+ lock_timeout_active = true;
+ return true;
+ }
+ /*
+ * On the other hand, if deadlock_timeout should happens later
+ * than lock_timeout, disable it. Life span of deadlock_timeout and
+ * lock_timeout is the same.
+ */
+ else
+ deadlock_timeout_active = false;
+ }
+
+ /* If we reach here, okay to set the timer interrupt */
+ MemSet(&timeval, 0, sizeof(struct itimerval));
+ timeval.it_value.tv_sec = delayms / 1000;
+ timeval.it_value.tv_usec = (delayms % 1000) * 1000;
+ if (setitimer(ITIMER_REAL, &timeval, NULL))
+ return false;
+
+ lock_timeout_fin_time = fin_time;
+ lock_timeout_active = true;
+ return true;
+ }
+
+ /*
* Cancel the SIGALRM timer, either for a deadlock timeout or a statement
* timeout. If a deadlock timeout is canceled, any active statement timeout
* remains in force.
*************** disable_sig_alarm(bool is_statement_time
*** 1502,1508 ****
*
* We will re-enable the interrupt if necessary in CheckStatementTimeout.
*/
! if (statement_timeout_active || deadlock_timeout_active)
{
struct itimerval timeval;
--- 1629,1635 ----
*
* We will re-enable the interrupt if necessary in CheckStatementTimeout.
*/
! if (statement_timeout_active || deadlock_timeout_active || lock_timeout_active)
{
struct itimerval timeval;
*************** disable_sig_alarm(bool is_statement_time
*** 1512,1517 ****
--- 1639,1646 ----
statement_timeout_active = false;
cancel_from_timeout = false;
deadlock_timeout_active = false;
+ lock_timeout_active = false;
+ lock_timeout_detected = false;
return false;
}
}
*************** disable_sig_alarm(bool is_statement_time
*** 1519,1524 ****
--- 1648,1656 ----
/* Always cancel deadlock timeout, in case this is error cleanup */
deadlock_timeout_active = false;
+ /* Ditto for lock_timeout */
+ lock_timeout_active = false;
+
/* Cancel or reschedule statement timeout */
if (is_statement_timeout)
{
*************** CheckStatementTimeout(void)
*** 1590,1595 ****
--- 1722,1777 ----
/*
+ * Check for lock timeout. If the timeout time has come,
+ * indicate it; if not, reschedule the SIGALRM interrupt to occur
+ * at the right time.
+ *
+ * Returns true if okay, false if failed to set the interrupt.
+ */
+ static bool
+ CheckLockTimeout(void)
+ {
+ TimestampTz now;
+
+ if (!lock_timeout_active)
+ return true; /* do nothing if not active */
+
+ now = GetCurrentTimestamp();
+
+ if (now >= lock_timeout_fin_time)
+ {
+ /* Time to die */
+ lock_timeout_active = false;
+ lock_timeout_detected = true;
+ }
+ else
+ {
+ /* Not time yet, so (re)schedule the interrupt */
+ long secs;
+ int usecs;
+ struct itimerval timeval;
+
+ TimestampDifference(now, statement_fin_time,
+ &secs, &usecs);
+
+ /*
+ * It's possible that the difference is less than a microsecond;
+ * ensure we don't cancel, rather than set, the interrupt.
+ */
+ if (secs == 0 && usecs == 0)
+ usecs = 1;
+ MemSet(&timeval, 0, sizeof(struct itimerval));
+ timeval.it_value.tv_sec = secs;
+ timeval.it_value.tv_usec = usecs;
+ if (setitimer(ITIMER_REAL, &timeval, NULL))
+ return false;
+ }
+
+ return true;
+ }
+
+
+ /*
* Signal handler for SIGALRM for normal user backends
*
* Process deadlock check and/or statement timeout check, as needed.
*************** handle_sig_alarm(SIGNAL_ARGS)
*** 1608,1613 ****
--- 1790,1798 ----
CheckDeadLock();
}
+ if (lock_timeout_active)
+ (void) CheckLockTimeout();
+
if (statement_timeout_active)
(void) CheckStatementTimeout();
diff -dcrpN pgsql.orig/src/backend/utils/misc/guc.c pgsql/src/backend/utils/misc/guc.c
*** pgsql.orig/src/backend/utils/misc/guc.c 2010-07-26 10:05:55.000000000 +0200
--- pgsql/src/backend/utils/misc/guc.c 2010-07-29 11:58:56.000000000 +0200
*************** static struct config_int ConfigureNamesI
*** 1648,1653 ****
--- 1648,1663 ----
},
{
+ {"lock_timeout", PGC_USERSET, CLIENT_CONN_STATEMENT,
+ gettext_noop("Sets the maximum allowed timeout for any lock taken by a statement."),
+ gettext_noop("A value of 0 turns off the timeout."),
+ GUC_UNIT_MS
+ },
+ &LockTimeout,
+ 0, 0, INT_MAX, NULL, NULL
+ },
+
+ {
{"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT,
gettext_noop("Minimum age at which VACUUM should freeze a table row."),
NULL
diff -dcrpN pgsql.orig/src/backend/utils/misc/postgresql.conf.sample pgsql/src/backend/utils/misc/postgresql.conf.sample
*** pgsql.orig/src/backend/utils/misc/postgresql.conf.sample 2010-07-26 10:05:55.000000000 +0200
--- pgsql/src/backend/utils/misc/postgresql.conf.sample 2010-07-29 11:58:56.000000000 +0200
***************
*** 492,497 ****
--- 492,500 ----
#------------------------------------------------------------------------------
#deadlock_timeout = 1s
+ #lock_timeout = 0 # timeout value for heavy-weight locks
+ # taken by statements. 0 disables timeout
+ # unit in milliseconds, default is 0
#max_locks_per_transaction = 64 # min 10
# (change requires restart)
# Note: Each lock table slot uses ~270 bytes of shared memory, and there are
diff -dcrpN pgsql.orig/src/include/storage/pg_sema.h pgsql/src/include/storage/pg_sema.h
*** pgsql.orig/src/include/storage/pg_sema.h 2010-01-02 17:58:08.000000000 +0100
--- pgsql/src/include/storage/pg_sema.h 2010-07-29 11:58:56.000000000 +0200
*************** extern void PGSemaphoreUnlock(PGSemaphor
*** 80,83 ****
--- 80,86 ----
/* Lock a semaphore only if able to do so without blocking */
extern bool PGSemaphoreTryLock(PGSemaphore sema);
+ /* Lock a semaphore (decrement count), blocking if count would be < 0 */
+ extern void PGSemaphoreTimedLock(PGSemaphore sema, bool interruptOK);
+
#endif /* PG_SEMA_H */
diff -dcrpN pgsql.orig/src/include/storage/proc.h pgsql/src/include/storage/proc.h
*** pgsql.orig/src/include/storage/proc.h 2010-07-11 11:15:00.000000000 +0200
--- pgsql/src/include/storage/proc.h 2010-07-29 11:58:56.000000000 +0200
*************** typedef struct PROC_HDR
*** 163,170 ****
--- 163,172 ----
/* configurable options */
extern int DeadlockTimeout;
extern int StatementTimeout;
+ extern int LockTimeout;
extern bool log_lock_waits;
+ extern volatile bool lock_timeout_detected;
extern volatile bool cancel_from_timeout;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers