On 09/15/2015 03:51 PM, Jesper Pedersen wrote:
It
would be nice to get a better sense of how *long* we block on various
locks.  It's hard to tell whether some other lock might be have fewer
blocking events but for a much longer average duration.



I did a run with the attached patch, recording the time from LWLockQueueSelf() until the lock is required.

Doing block_time / block_count basically only shows "main 0" -- its called "unassigned:0"; it also shows up in the max exclusive report. Where it is coming from is another question, since it shouldn't be in use.

Due to the overhead of gettimeofday() I think that _v1 is better to consider.

Best regards,
 Jesper

*** /tmp/K1hW5V_lwlock.c	2015-09-16 09:53:26.682803203 -0400
--- src/backend/storage/lmgr/lwlock.c	2015-09-16 08:59:11.232712749 -0400
***************
*** 92,97 ****
--- 92,98 ----
  #include "utils/memutils.h"
  
  #ifdef LWLOCK_STATS
+ #include <sys/time.h>
  #include "utils/hsearch.h"
  #endif
  
***************
*** 163,176 ****
--- 164,190 ----
  {
  	lwlock_stats_key key;
  	int			sh_acquire_count;
+ 	int			sh_acquire_max;
  	int			ex_acquire_count;
+ 	int			ex_acquire_max;
  	int			block_count;
+ 	double      block_time;
+ 	double      timestamp;
  	int			dequeue_self_count;
  	int			spin_delay_count;
+ 	int			max_waiters;
  }	lwlock_stats;
  
  static HTAB *lwlock_stats_htab;
  static lwlock_stats lwlock_stats_dummy;
+ 
+ static double
+ GetTimeInMillis()
+ {
+ 	struct timeval t;
+ 	gettimeofday(&t, NULL);
+ 	return (t.tv_sec + (t.tv_usec / 1000000.0)) * 1000.0;
+ }
  #endif
  
  #ifdef LOCK_DEBUG
***************
*** 297,308 ****
  
  	while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
  	{
! 		fprintf(stderr,
! 				"PID %d lwlock %s %d: shacq %u exacq %u blk %u spindelay %u dequeue self %u\n",
! 				MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
! 				lwstats->key.instance, lwstats->sh_acquire_count,
! 				lwstats->ex_acquire_count, lwstats->block_count,
! 				lwstats->spin_delay_count, lwstats->dequeue_self_count);
  	}
  
  	LWLockRelease(&MainLWLockArray[0].lock);
--- 311,332 ----
  
  	while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
  	{
! 		if (lwstats->key.tranche == 0 && lwstats->key.instance < NUM_INDIVIDUAL_LWLOCKS)
! 			fprintf(stderr,
! 					"PID %d lwlock %s: shacq %u shmax %u exacq %u exmax %u blk %u blktime %u spindelay %u dequeue self %u maxw %u\n",
! 					MyProcPid, MainLWLockNames[lwstats->key.instance],
! 					lwstats->sh_acquire_count, lwstats->sh_acquire_max,
! 					lwstats->ex_acquire_count, lwstats->ex_acquire_max,
! 					lwstats->block_count, (unsigned int)lwstats->block_time, lwstats->spin_delay_count, lwstats->dequeue_self_count,
! 					lwstats->max_waiters);
! 		else
! 			fprintf(stderr,
! 					"PID %d lwlock %s %d: shacq %u shmax %u exacq %u exmax %u blk %u blktime %u spindelay %u dequeue self %u maxw %u\n",
! 					MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
! 					lwstats->key.instance, lwstats->sh_acquire_count, lwstats->sh_acquire_max,
! 					lwstats->ex_acquire_count, lwstats->ex_acquire_max,
! 					lwstats->block_count, (unsigned int)lwstats->block_time, lwstats->spin_delay_count, lwstats->dequeue_self_count,
! 					lwstats->max_waiters);
  	}
  
  	LWLockRelease(&MainLWLockArray[0].lock);
***************
*** 330,339 ****
--- 354,368 ----
  	if (!found)
  	{
  		lwstats->sh_acquire_count = 0;
+ 		lwstats->sh_acquire_max = 0;
  		lwstats->ex_acquire_count = 0;
+ 		lwstats->ex_acquire_max = 0;
  		lwstats->block_count = 0;
+ 		lwstats->block_time = 0.0;
+ 		lwstats->timestamp = 0.0;
  		lwstats->dequeue_self_count = 0;
  		lwstats->spin_delay_count = 0;
+ 		lwstats->max_waiters = 0;
  	}
  	return lwstats;
  }
***************
*** 774,779 ****
--- 803,811 ----
  LWLockQueueSelf(LWLock *lock, LWLockMode mode)
  {
  #ifdef LWLOCK_STATS
+ 	bool include;
+ 	int counter, size;
+ 	dlist_iter iter;
  	lwlock_stats *lwstats;
  
  	lwstats = get_lwlock_stats_entry(lock);
***************
*** 791,797 ****
--- 823,870 ----
  		elog(PANIC, "queueing for lock while waiting on another one");
  
  #ifdef LWLOCK_STATS
+ 	lwstats->timestamp = GetTimeInMillis();
  	lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
+ 
+ 	/*
+ 	 * We scan the list of waiters from the back in order to find
+ 	 * out how many of the same lock type are waiting for a lock.
+ 	 * Similar types have the potential to be groupped together.
+ 	 *
+ 	 * We also count the number of waiters, including ourself.
+ 	 */
+ 	include = true;
+ 	size = 1;
+ 	counter = 1;
+ 
+ 	dlist_reverse_foreach(iter, &lock->waiters)
+ 	{
+ 		if (include)
+ 		{
+ 			PGPROC	   *waiter = dlist_container(PGPROC, lwWaitLink, iter.cur);
+ 
+ 			if (waiter->lwWaitMode == mode)
+ 				counter += 1;
+ 			else
+ 				include = false;
+ 		}
+ 		
+ 		size += 1;
+ 	}
+ 
+ 	if (mode == LW_EXCLUSIVE || mode == LW_WAIT_UNTIL_FREE)
+ 	{
+ 		if (counter > lwstats->ex_acquire_max)
+ 			lwstats->ex_acquire_max = counter;
+ 	}
+ 	else if (mode == LW_SHARED)
+ 	{
+ 		if (counter > lwstats->sh_acquire_max)
+ 			lwstats->sh_acquire_max = counter;
+ 	}
+ 
+ 	if (size > lwstats->max_waiters)
+ 		lwstats->max_waiters = size;
  #else
  	SpinLockAcquire(&lock->mutex);
  #endif
***************
*** 943,951 ****
--- 1016,1032 ----
  #ifdef LWLOCK_STATS
  	/* Count lock acquisition attempts */
  	if (mode == LW_EXCLUSIVE)
+ 	{
  		lwstats->ex_acquire_count++;
+ 		if (lwstats->ex_acquire_max == 0)
+ 			lwstats->ex_acquire_max = 1;
+ 	}
  	else
+ 	{
  		lwstats->sh_acquire_count++;
+ 		if (lwstats->sh_acquire_max == 0)
+ 			lwstats->sh_acquire_max = 1;
+ 	}
  #endif   /* LWLOCK_STATS */
  
  	/*
***************
*** 1021,1026 ****
--- 1102,1110 ----
  			LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");
  
  			LWLockDequeueSelf(lock);
+ #ifdef LWLOCK_STATS
+ 			lwstats->block_time += (GetTimeInMillis() - lwstats->timestamp);
+ #endif
  			break;
  		}
  
***************
*** 1068,1073 ****
--- 1152,1160 ----
  
  		LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
  
+ #ifdef LWLOCK_STATS
+ 		lwstats->block_time += (GetTimeInMillis() - lwstats->timestamp);
+ #endif
  		/* Now loop back and try to acquire lock again. */
  		result = false;
  	}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to