Tom Lane wrote:
No, I don't believe we should be messing with the semantics of
try_relation_open.  It is what it is.

With only four pretty simple callers to the thing, and two of them needing the alternate behavior, it seemed a reasonable place to modify to me. I thought the "nowait" boolean idea was in enough places that it was reasonable to attach to try_relation_open.

Attached patch solves the "wait for lock forever" problem, and introduces a new log message when AV or auto-analyze fail to obtain a lock on something that needs to be cleaned up:

DEBUG:  autovacuum: processing database "gsmith"
INFO:  autovacuum skipping relation 65563 --- cannot open or obtain lock
INFO:  autoanalyze skipping relation 65563 --- cannot open or obtain lock

My main concern is that this may cause AV to constantly fail to get access to a busy table, where in the current code it would queue up and eventually get the lock needed. A secondary issue is that while the autovacuum messages only show up if you have log_autovacuum_min_duration set to not -1, the autoanalyze ones can't be stopped.

If you don't like the way I structured the code, you can certainly do it some other way instead. I thought this approach was really simple and not unlike similar code elsewhere.

Here's the test case that worked for me here again:

psql
SHOW log_autovacuum_min_duration;
DROP TABLE t;
CREATE TABLE t(s serial,i integer);
INSERT INTO t(i) SELECT generate_series(1,100000);
SELECT relname,last_autovacuum,autovacuum_count FROM pg_stat_user_tables WHERE relname='t';
DELETE FROM t WHERE s<50000;
\q
psql
BEGIN;
LOCK t;

Leave that open, then go to anther session with old "tail -f" on the logs to wait for the errors to show up.

--
Greg Smith   2ndQuadrant US    g...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us
"PostgreSQL 9.0 High Performance": http://www.2ndQuadrant.com/books

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 25d9fde..4193fff 100644
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
*************** relation_open(Oid relationId, LOCKMODE l
*** 918,936 ****
   *
   *		Same as relation_open, except return NULL instead of failing
   *		if the relation does not exist.
   * ----------------
   */
  Relation
! try_relation_open(Oid relationId, LOCKMODE lockmode)
  {
  	Relation	r;
! 
  	Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
  
  	/* Get the lock first */
  	if (lockmode != NoLock)
! 		LockRelationOid(relationId, lockmode);
! 
  	/*
  	 * Now that we have the lock, probe to see if the relation really exists
  	 * or not.
--- 918,951 ----
   *
   *		Same as relation_open, except return NULL instead of failing
   *		if the relation does not exist.
+  *
+  *		If called with nowait enabled, this will immediately exit
+  *		if a lock is requested and it can't be acquired.  The
+  *		return code in this case doesn't distinguish between this
+  *		situation and the one where the relation was locked, but
+  *		doesn't exist.  Callers using nowait must not care that
+  *		they won't be able to tell the difference.
   * ----------------
   */
  Relation
! try_relation_open(Oid relationId, LOCKMODE lockmode, bool nowait)
  {
  	Relation	r;
! 	bool		locked;
  	Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES);
  
  	/* Get the lock first */
  	if (lockmode != NoLock)
! 		{
! 		if (nowait)
! 			{
! 			locked=ConditionalLockRelationOid(relationId, lockmode);
! 			if (!locked)
! 				return NULL;									
! 			}
! 		else
! 			LockRelationOid(relationId, lockmode);
! 		}
  	/*
  	 * Now that we have the lock, probe to see if the relation really exists
  	 * or not.
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 7bc5f11..24bfb16 100644
*** a/src/backend/commands/analyze.c
--- b/src/backend/commands/analyze.c
*************** analyze_rel(Oid relid, VacuumStmt *vacst
*** 147,156 ****
  	 * concurrent VACUUM, which doesn't matter much at the moment but might
  	 * matter if we ever try to accumulate stats on dead tuples.) If the rel
  	 * has been dropped since we last saw it, we don't need to process it.
  	 */
! 	onerel = try_relation_open(relid, ShareUpdateExclusiveLock);
  	if (!onerel)
! 		return;
  
  	/*
  	 * Check permissions --- this should match vacuum's check!
--- 147,168 ----
  	 * concurrent VACUUM, which doesn't matter much at the moment but might
  	 * matter if we ever try to accumulate stats on dead tuples.) If the rel
  	 * has been dropped since we last saw it, we don't need to process it.
+ 	 *
+ 	 * If this is a manual analyze, opening will wait forever to acquire
+ 	 * the requested lock on the relation.  Autovacuum will just give up
+ 	 * immediately if it can't get the lock.  This prevents a series of locked
+ 	 * relations from potentially hanging all of the AV workers waiting
+ 	 * for locks.
  	 */
! 	onerel = try_relation_open(relid, ShareUpdateExclusiveLock, IsAutoVacuumWorkerProcess());
  	if (!onerel)
! 		{
! 			if (IsAutoVacuumWorkerProcess())
! 				ereport(INFO,
! 					(errmsg("autoanalyze skipping relation %d --- cannot open or obtain lock",
! 						relid)));
! 			return;
! 		}
  
  	/*
  	 * Check permissions --- this should match vacuum's check!
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 19c3cf9..b5d5caa 100644
*** a/src/backend/commands/cluster.c
--- b/src/backend/commands/cluster.c
*************** cluster_rel(Oid tableOid, Oid indexOid, 
*** 276,282 ****
  	 * case, since cluster() already did it.)  The index lock is taken inside
  	 * check_index_is_clusterable.
  	 */
! 	OldHeap = try_relation_open(tableOid, AccessExclusiveLock);
  
  	/* If the table has gone away, we can skip processing it */
  	if (!OldHeap)
--- 276,282 ----
  	 * case, since cluster() already did it.)  The index lock is taken inside
  	 * check_index_is_clusterable.
  	 */
! 	OldHeap = try_relation_open(tableOid, AccessExclusiveLock,false);
  
  	/* If the table has gone away, we can skip processing it */
  	if (!OldHeap)
diff --git a/src/backend/commands/lockcmds.c b/src/backend/commands/lockcmds.c
index b2c6ea5..488504d 100644
*** a/src/backend/commands/lockcmds.c
--- b/src/backend/commands/lockcmds.c
*************** LockTableRecurse(Oid reloid, RangeVar *r
*** 106,112 ****
  	 * Now that we have the lock, check to see if the relation really exists
  	 * or not.
  	 */
! 	rel = try_relation_open(reloid, NoLock);
  
  	if (!rel)
  	{
--- 106,112 ----
  	 * Now that we have the lock, check to see if the relation really exists
  	 * or not.
  	 */
! 	rel = try_relation_open(reloid, NoLock,false);
  
  	if (!rel)
  	{
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 9098c5d..cb5b0e0 100644
*** a/src/backend/commands/vacuum.c
--- b/src/backend/commands/vacuum.c
*************** vacuum_rel(Oid relid, VacuumStmt *vacstm
*** 844,856 ****
  	 *
  	 * There's a race condition here: the rel may have gone away since the
  	 * last time we saw it.  If so, we don't need to vacuum it.
  	 */
! 	onerel = try_relation_open(relid, lmode);
  
  	if (!onerel)
  	{
  		PopActiveSnapshot();
  		CommitTransactionCommand();
  		return;
  	}
  
--- 844,868 ----
  	 *
  	 * There's a race condition here: the rel may have gone away since the
  	 * last time we saw it.  If so, we don't need to vacuum it.
+ 	 * 
+ 	 * If this is a manual vacuum, opening will wait forever to acquire
+ 	 * the requested lock on the relation.  Autovacuum will just give up
+ 	 * immediately if it can't get the lock.  This prevents a series of locked
+ 	 * relations from potentially hanging all of the AV workers waiting
+ 	 * for locks.
  	 */
! 	onerel = try_relation_open(relid, lmode, IsAutoVacuumWorkerProcess());
  
  	if (!onerel)
  	{
  		PopActiveSnapshot();
  		CommitTransactionCommand();
+ 		if (IsAutoVacuumWorkerProcess() && Log_autovacuum_min_duration >= 0)
+ 		{
+ 		ereport(INFO,
+ 			(errmsg("autovacuum skipping relation %d --- cannot open or obtain lock",
+ 				relid)));
+ 		}
  		return;
  	}
  
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 9efab4c..981dfe8 100644
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
*************** typedef enum
*** 48,54 ****
  
  /* in heap/heapam.c */
  extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
! extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode);
  extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
  extern Relation try_relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
  extern void relation_close(Relation relation, LOCKMODE lockmode);
--- 48,54 ----
  
  /* in heap/heapam.c */
  extern Relation relation_open(Oid relationId, LOCKMODE lockmode);
! extern Relation try_relation_open(Oid relationId, LOCKMODE lockmode, bool nowait);
  extern Relation relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
  extern Relation try_relation_openrv(const RangeVar *relation, LOCKMODE lockmode);
  extern void relation_close(Relation relation, LOCKMODE lockmode);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to