Tom Lane wrote:
No, I don't believe we should be messing with the semantics of
try_relation_open. It is what it is.
With only four pretty simple callers to the thing, and two of them
needing the alternate behavior, it seemed a reasonable place to modify
to me. I thought the "nowait" boolean idea was in enough places that it
was reasonable to attach to try_relation_open.
Attached patch solves the "wait for lock forever" problem, and
introduces a new log message when AV or auto-analyze fail to obtain a
lock on something that needs to be cleaned up:
DEBUG: autovacuum: processing database "gsmith"
INFO: autovacuum skipping relation 65563 --- cannot open or obtain lock
INFO: autoanalyze skipping relation 65563 --- cannot open or obtain lock
My main concern is that this may cause AV to constantly fail to get
access to a busy table, where in the current code it would queue up and
eventually get the lock needed. A secondary issue is that while the
autovacuum messages only show up if you have log_autovacuum_min_duration
set to not -1, the autoanalyze ones can't be stopped.
If you don't like the way I structured the code, you can certainly do it
some other way instead. I thought this approach was really simple and
not unlike similar code elsewhere.
Here's the test case that worked for me here again:
psql
SHOW log_autovacuum_min_duration;
DROP TABLE t;
CREATE TABLE t(s serial,i integer);
INSERT INTO t(i) SELECT generate_series(1,100000);
SELECT relname,last_autovacuum,autovacuum_count FROM pg_stat_user_tables
WHERE relname='t';
DELETE FROM t WHERE s<50000;
\q
psql
BEGIN;
LOCK t;
Leave that open, then go to anther session with old "tail -f" on the
logs to wait for the errors to show up.
--
Greg Smith 2ndQuadrant US g...@2ndquadrant.com Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.us
"PostgreSQL 9.0 High Performance": http://www.2ndQuadrant.com/books
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 25d9fde..4193fff 100644
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** relation_open(Oid relationId, LOCKMODE l
*** 918,936 ****
*
* Same as relation_open, except return NULL instead of failing
* if the relation does not exist.
* ----------------
*/
Relation
! try_relation_open(Oid relationId, LOCKMODE lockmode)
{
Relation r;
!
Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
/* Get the lock first */
if (lockmode != NoLock)
! LockRelationOid(relationId, lockmode);
!
/*
* Now that we have the lock, probe to see if the relation really exists
* or not.
--- 918,951 ----
*
* Same as relation_open, except return NULL instead of failing
* if the relation does not exist.
+ *
+ * If called with nowait enabled, this will immediately exit
+ * if a lock is requested and it can't be acquired. The
+ * return code in this case doesn't distinguish between this
+ * situation and the one where the relation was locked, but
+ * doesn't exist. Callers using nowait must not care that
+ * they won't be able to tell the difference.
* ----------------
*/
Relation
! try_relation_open(Oid relationId, LOCKMODE lockmode, bool nowait)
{
Relation r;
! bool locked;
Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
/* Get the lock first */
if (lockmode != NoLock)
! {
! if (nowait)
! {
! locked=ConditionalLockRelationOid(relationId, lockmode);
! if (!locked)
! return NULL;
! }
! else
! LockRelationOid(relationId, lockmode);
! }
/*
* Now that we have the lock, probe to see if the relation really exists
* or not.
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 7bc5f11..24bfb16 100644
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
*************** analyze_rel(Oid relid, VacuumStmt *vacst
*** 147,156 ****
* concurrent VACUUM, which doesn't matter much at the moment but might
* matter if we ever try to accumulate stats on dead tuples.) If the rel
* has been dropped since we last saw it, we don't need to process it.
*/
! onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
if (!onerel)
! return;
/*
* Check permissions --- this should match vacuum's check!
--- 147,168 ----
* concurrent VACUUM, which doesn't matter much at the moment but might
* matter if we ever try to accumulate stats on dead tuples.) If the rel
* has been dropped since we last saw it, we don't need to process it.
+ *
+ * If this is a manual analyze, opening will wait forever to acquire
+ * the requested lock on the relation. Autovacuum will just give up
+ * immediately if it can't get the lock. This prevents a series of locked
+ * relations from potentially hanging all of the AV workers waiting
+ * for locks.
*/
! onerel = try_relation_open(relid, ShareUpdateExclusiveLock, IsAutoVacuumWorkerProcess());
if (!onerel)
! {
! if (IsAutoVacuumWorkerProcess())
! ereport(INFO,
! (errmsg("autoanalyze skipping relation %d --- cannot open or obtain lock",
! relid)));
! return;
! }
/*
* Check permissions --- this should match vacuum's check!
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 19c3cf9..b5d5caa 100644
*** a/src/backend/commands/cluster.c
--- b/src/backend/commands/cluster.c
*************** cluster_rel(Oid tableOid, Oid indexOid,
*** 276,282 ****
* case, since cluster() already did it.) The index lock is taken inside
* check_index_is_clusterable.
*/
! OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
/* If the table has gone away, we can skip processing it */
if (!OldHeap)
--- 276,282 ----
* case, since cluster() already did it.) The index lock is taken inside
* check_index_is_clusterable.
*/
! OldHeap = try_relation_open(tableOid, AccessExclusiveLock,false);
/* If the table has gone away, we can skip processing it */
if (!OldHeap)
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
index b2c6ea5..488504d 100644
*** a/src/backend/commands/lockcmds.c
--- b/src/backend/commands/lockcmds.c
*************** LockTableRecurse(Oid reloid, RangeVar *r
*** 106,112 ****
* Now that we have the lock, check to see if the relation really exists
* or not.
*/
! rel = try_relation_open(reloid, NoLock);
if (!rel)
{
--- 106,112 ----
* Now that we have the lock, check to see if the relation really exists
* or not.
*/
! rel = try_relation_open(reloid, NoLock,false);
if (!rel)
{
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 9098c5d..cb5b0e0 100644
*** a/src/backend/commands/vacuum.c
--- b/src/backend/commands/vacuum.c
*************** vacuum_rel(Oid relid, VacuumStmt *vacstm
*** 844,856 ****
*
* There's a race condition here: the rel may have gone away since the
* last time we saw it. If so, we don't need to vacuum it.
*/
! onerel = try_relation_open(relid, lmode);
if (!onerel)
{
PopActiveSnapshot();
CommitTransactionCommand();
return;
}
--- 844,868 ----
*
* There's a race condition here: the rel may have gone away since the
* last time we saw it. If so, we don't need to vacuum it.
+ *
+ * If this is a manual vacuum, opening will wait forever to acquire
+ * the requested lock on the relation. Autovacuum will just give up
+ * immediately if it can't get the lock. This prevents a series of locked
+ * relations from potentially hanging all of the AV workers waiting
+ * for locks.
*/
! onerel = try_relation_open(relid, lmode, IsAutoVacuumWorkerProcess());
if (!onerel)
{
PopActiveSnapshot();
CommitTransactionCommand();
+ if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
+ {
+ ereport(INFO,
+ (errmsg("autovacuum skipping relation %d --- cannot open or obtain lock",
+ relid)));
+ }
return;
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 9efab4c..981dfe8 100644
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
*************** typedef enum
*** 48,54 ****
/* in heap/heapam.c */
extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
! extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode);
extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
extern Relation try_relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
extern void relation_close(Relation relation, LOCKMODE lockmode);
--- 48,54 ----
/* in heap/heapam.c */
extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
! extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode, bool nowait);
extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
extern Relation try_relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
extern void relation_close(Relation relation, LOCKMODE lockmode);
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers