Hi

2021年1月4日(月) 15:04 torikoshia <torikos...@oss.nttdata.com>:
>
> On 2021-01-02 06:49, Justin Pryzby wrote:
> > On Tue, Dec 15, 2020 at 11:47:23AM +0900, torikoshia wrote:
> >> So I'm now thinking about adding a new column in pg_locks which
> >> keeps the time at which locks started waiting.
> >>
> >> Attached a patch.
> >
> > This is failing make check-world, would you send an updated patch ?
> >
> > I added you as an author so it shows up here.
> > http://cfbot.cputube.org/atsushi-torikoshi.html
>
> Thanks!
>
> Attached an updated patch.

I took a look at this patch as it seems useful (and I have an item on my
bucket
list labelled "look at the locking code", which I am not at all familiar
with).

I tested the patch by doing the following:

Session 1:

    postgres=# CREATE TABLE foo (id int);
    CREATE TABLE

    postgres=# BEGIN ;
    BEGIN

    postgres=*# INSERT INTO foo VALUES (1);
    INSERT 0 1

Session 2:

    postgres=# BEGIN ;
    BEGIN

    postgres=*# LOCK TABLE foo;

Session 3:

    postgres=# SELECT locktype, relation, pid, mode, granted, fastpath,
wait_start
                 FROM pg_locks
                WHERE relation = 'foo'::regclass AND NOT granted\x\g\x

    -[ RECORD 1 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3513935
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:03:06.683053+09

So far so good, but checking *all* the locks on this relation:

    postgres=# SELECT locktype, relation, pid, mode, granted, fastpath,
wait_start
                 FROM pg_locks
                WHERE relation = 'foo'::regclass\x\g\x

    -[ RECORD 1 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3513824
    mode       | RowExclusiveLock
    granted    | t
    fastpath   | f
    wait_start | 2021-01-14 12:03:06.683053+09
    -[ RECORD 2 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3513935
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:03:06.683053+09

shows the RowExclusiveLock granted in session 1 as apparently waiting from
exactly the same time as session 2's attempt to acquire the lock, which is
clearly
not right.

Also, if a further session attempts to acquire a lock, we get:

    postgres=# SELECT locktype, relation, pid, mode, granted, fastpath,
wait_start
                 FROM pg_locks
                WHERE relation = 'foo'::regclass\x\g\x

    -[ RECORD 1 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3513824
    mode       | RowExclusiveLock
    granted    | t
    fastpath   | f
    wait_start | 2021-01-14 12:05:53.747309+09
    -[ RECORD 2 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3514240
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:05:53.747309+09
    -[ RECORD 3 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3513935
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:05:53.747309+09

i.e. all entries now have "wait_start" set to the start time of the latest
session's
lock acquisition attempt.

Looking at the code, this happens as the wait start time is being recorded
in
the lock record itself, so always contains the value reported by the latest
lock
acquisition attempt.

It looks like the logical place to store the value is in the PROCLOCK
structure; the attached patch reworks your patch to do that, and given the
above
scenario produces following output:

    postgres=# SELECT locktype, relation, pid, mode, granted, fastpath,
wait_start
                 FROM pg_locks
                WHERE relation = 'foo'::regclass\x\g\x

    -[ RECORD 1 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3516391
    mode       | RowExclusiveLock
    granted    | t
    fastpath   | f
    wait_start |
    -[ RECORD 2 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3516470
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:19:16.217163+09
    -[ RECORD 3 ]-----------------------------
    locktype   | relation
    relation   | 16452
    pid        | 3516968
    mode       | AccessExclusiveLock
    granted    | f
    fastpath   | f
    wait_start | 2021-01-14 12:18:08.195429+09

As mentioned, I'm not at all familiar with the locking code so it's quite
possible that it's incomplete,there may be non-obvious side-effects, or it's
the wrong approach altogether etc., so further review is necessary.


Regards

Ian Barwick

-- 
EnterpriseDB: https://www.enterprisedb.com
diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out
index 13848b7449..c0aecb0288 100644
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -97,8 +97,8 @@ SELECT bt_index_parent_check('bttest_b_idx');
 SELECT * FROM pg_locks
 WHERE relation = ANY(ARRAY['bttest_a', 'bttest_a_idx', 'bttest_b', 'bttest_b_idx']::regclass[])
     AND pid = pg_backend_pid();
- locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath 
-----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+----------
+ locktype | database | relation | page | tuple | virtualxid | transactionid | classid | objid | objsubid | virtualtransaction | pid | mode | granted | fastpath | wait_start 
+----------+----------+----------+------+-------+------------+---------------+---------+-------+----------+--------------------+-----+------+---------+----------+------------
 (0 rows)
 
 COMMIT;
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 3a2266526c..7b8a47f438 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -10578,6 +10578,16 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        lock table
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>wait_start</structfield> <type>timestamptz</type>
+      </para>
+      <para>
+       Lock acquisition wait start time. <literal>NULL</literal> if
+       lock acquired.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 20e50247ea..f45dbf5dd5 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -1268,6 +1268,7 @@ SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
 			proc->lockGroupLeader : proc;
 		proclock->holdMask = 0;
 		proclock->releaseMask = 0;
+		proclock->wait_start = 0;
 		/* Add proclock to appropriate lists */
 		SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
 		SHMQueueInsertBefore(&(proc->myProcLocks[partition]),
@@ -3627,6 +3628,12 @@ GetLockStatusData(void)
 			instance->leaderPid = proc->pid;
 			instance->fastpath = true;
 
+			/*
+			 * Successfully taking fast path lock means there were no
+			 * conflicting locks.
+			 */
+			instance->wait_start = 0;
+
 			el++;
 		}
 
@@ -3654,6 +3661,7 @@ GetLockStatusData(void)
 			instance->pid = proc->pid;
 			instance->leaderPid = proc->pid;
 			instance->fastpath = true;
+			instance->wait_start = 0;
 
 			el++;
 		}
@@ -3706,6 +3714,7 @@ GetLockStatusData(void)
 		instance->pid = proc->pid;
 		instance->leaderPid = proclock->groupLeader->pid;
 		instance->fastpath = false;
+		instance->wait_start = proclock->wait_start;
 
 		el++;
 	}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index c87ffc6549..a06252f245 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1262,6 +1262,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		}
 		else
 			enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
+
+		proclock->wait_start = get_timeout_start_time(DEADLOCK_TIMEOUT);
 	}
 	else if (log_recovery_conflict_waits)
 	{
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index b1cf5b79a7..b347de4e61 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -63,7 +63,7 @@ typedef struct
 } PG_Lock_Status;
 
 /* Number of columns in pg_locks output */
-#define NUM_LOCK_STATUS_COLUMNS		15
+#define NUM_LOCK_STATUS_COLUMNS		16
 
 /*
  * VXIDGetDatum - Construct a text representation of a VXID
@@ -142,6 +142,8 @@ pg_lock_status(PG_FUNCTION_ARGS)
 						   BOOLOID, -1, 0);
 		TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath",
 						   BOOLOID, -1, 0);
+		TupleDescInitEntry(tupdesc, (AttrNumber) 16, "wait_start",
+						   TIMESTAMPTZOID, -1, 0);
 
 		funcctx->tuple_desc = BlessTupleDesc(tupdesc);
 
@@ -336,6 +338,10 @@ pg_lock_status(PG_FUNCTION_ARGS)
 		values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode));
 		values[13] = BoolGetDatum(granted);
 		values[14] = BoolGetDatum(instance->fastpath);
+		if (instance->wait_start != 0)
+			values[15] = TimestampTzGetDatum(instance->wait_start);
+		else
+			nulls[15] = true;
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		result = HeapTupleGetDatum(tuple);
@@ -406,6 +412,7 @@ pg_lock_status(PG_FUNCTION_ARGS)
 		values[12] = CStringGetTextDatum("SIReadLock");
 		values[13] = BoolGetDatum(true);
 		values[14] = BoolGetDatum(false);
+		nulls[15] = true;
 
 		tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
 		result = HeapTupleGetDatum(tuple);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index d27336adcd..5cd93c4587 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5948,9 +5948,9 @@
 { oid => '1371', descr => 'view system lock information',
   proname => 'pg_lock_status', prorows => '1000', proretset => 't',
   provolatile => 'v', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath}',
+  proallargtypes => '{text,oid,oid,int4,int2,text,xid,oid,oid,int2,text,int4,text,bool,bool,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid,virtualtransaction,pid,mode,granted,fastpath,wait_start}',
   prosrc => 'pg_lock_status' },
 { oid => '2561',
   descr => 'get array of PIDs of sessions blocking specified backend PID from acquiring a heavyweight lock',
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 2e6ef174e9..319438a61d 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -22,6 +22,7 @@
 #include "storage/lockdefs.h"
 #include "storage/lwlock.h"
 #include "storage/shmem.h"
+#include "utils/timestamp.h"
 
 /* struct PGPROC is declared in proc.h, but must forward-reference it */
 typedef struct PGPROC PGPROC;
@@ -366,6 +367,7 @@ typedef struct PROCLOCK
 	LOCKMASK	releaseMask;	/* bitmask for lock types to be released */
 	SHM_QUEUE	lockLink;		/* list link in LOCK's list of proclocks */
 	SHM_QUEUE	procLink;		/* list link in PGPROC's list of proclocks */
+	TimestampTz	wait_start;		/* time at which wait for lock acquisition started */
 } PROCLOCK;
 
 #define PROCLOCK_LOCKMETHOD(proclock) \
@@ -445,6 +447,7 @@ typedef struct LockInstanceData
 	LOCKMODE	waitLockMode;	/* lock awaited by this PGPROC, if any */
 	BackendId	backend;		/* backend ID of this PGPROC */
 	LocalTransactionId lxid;	/* local transaction ID of this PGPROC */
+	TimestampTz	wait_start;		/* time at which this PGPROC started waiting for lock */
 	int			pid;			/* pid of this PGPROC */
 	int			leaderPid;		/* pid of group leader; = pid if no group */
 	bool		fastpath;		/* taken via fastpath? */
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index a687e99d1e..8ccc859882 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1394,8 +1394,9 @@ pg_locks| SELECT l.locktype,
     l.pid,
     l.mode,
     l.granted,
-    l.fastpath
-   FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath);
+    l.fastpath,
+    l.wait_start
+   FROM pg_lock_status() l(locktype, database, relation, page, tuple, virtualxid, transactionid, classid, objid, objsubid, virtualtransaction, pid, mode, granted, fastpath, wait_start);
 pg_matviews| SELECT n.nspname AS schemaname,
     c.relname AS matviewname,
     pg_get_userbyid(c.relowner) AS matviewowner,

Reply via email to