diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index 53cc4b6..4270e37 100644
*** a/contrib/pgbench/pgbench.c
--- b/contrib/pgbench/pgbench.c
*************** int			fillfactor = 100;
*** 133,138 ****
--- 133,139 ----
  
  bool		use_log;			/* log transaction latencies to a file */
  bool		is_connect;			/* establish connection for each transaction */
+ bool		is_latencies;		/* report per-command latencies */
  int			main_pid;			/* main process id used in log filename */
  
  char	   *pghost = "";
*************** typedef struct
*** 171,177 ****
  	int64		until;			/* napping until (usec) */
  	Variable   *variables;		/* array of variable definitions */
  	int			nvariables;
! 	instr_time	txn_begin;		/* used for measuring latencies */
  	int			use_file;		/* index in sql_files for this client */
  	bool		prepared[MAX_FILES];
  } CState;
--- 172,179 ----
  	int64		until;			/* napping until (usec) */
  	Variable   *variables;		/* array of variable definitions */
  	int			nvariables;
! 	instr_time	txn_begin;		/* used for measuring transaction latencies */
! 	instr_time	stmt_begin;		/* used for measuring statement latencies */
  	int			use_file;		/* index in sql_files for this client */
  	bool		prepared[MAX_FILES];
  } CState;
*************** typedef struct
*** 199,204 ****
--- 201,207 ----
  /*
   * queries read from files
   */
+ #define COMMENT_COMMAND	0
  #define SQL_COMMAND		1
  #define META_COMMAND	2
  #define MAX_ARGS		10
*************** static const char *QUERYMODE[] = {"simpl
*** 216,224 ****
--- 219,230 ----
  
  typedef struct
  {
+ 	char	   *line;			/* line containing the command */
  	int			type;			/* command type (SQL_COMMAND or META_COMMAND) */
  	int			argc;			/* number of commands */
  	char	   *argv[MAX_ARGS]; /* command list */
+ 	instr_time *exec_elapsed;	/* time spent executing this command (per thread) */
+ 	int		   *exec_count;		/* number of executions (per thread) */
  } Command;
  
  static Command **sql_files[MAX_FILES];	/* SQL script files */
*************** usage(const char *progname)
*** 287,292 ****
--- 293,299 ----
  		   "               define variable for use by custom script\n"
  		   "  -f FILENAME  read transaction script from FILENAME\n"
  		   "  -j NUM       number of threads (default: 1)\n"
+ 		   "  -r           report average latency per command\n"
  		   "  -l           write transaction times to log file\n"
  		   "  -M {simple|extended|prepared}\n"
  		   "               protocol for submitting queries to server (default: simple)\n"
*************** clientDone(CState *st, bool ok)
*** 740,746 ****
  
  /* return false iff client should be disconnected */
  static bool
! doCustom(CState *st, instr_time *conn_time, FILE *logfile)
  {
  	PGresult   *res;
  	Command   **commands;
--- 747,753 ----
  
  /* return false iff client should be disconnected */
  static bool
! doCustom(TState* thread, CState *st, instr_time *conn_time, FILE *logfile)
  {
  	PGresult   *res;
  	Command   **commands;
*************** top:
*** 800,805 ****
--- 807,824 ----
  #endif
  		}
  
+ 		/* Accumulate per-command execution times if per-command latencies
+ 		   are requested */
+ 		if (is_latencies && (commands[st->state]->type != COMMENT_COMMAND))
+ 		{
+ 			instr_time now;
+ 			
+ 			INSTR_TIME_SET_CURRENT(now);
+ 			INSTR_TIME_ACCUM_DIFF(commands[st->state]->exec_elapsed[thread->tid],
+ 			                      now, st->stmt_begin);
+ 			++(commands[st->state]->exec_count[thread->tid]);
+ 		}
+ 
  		if (commands[st->state]->type == SQL_COMMAND)
  		{
  			res = PQgetResult(st->con);
*************** top:
*** 840,845 ****
--- 859,871 ----
  			commands = sql_files[st->use_file];
  		}
  	}
+ 	
+ 	/* Skip comments */
+ 	if (commands[st->state]->type == COMMENT_COMMAND)
+ 	{
+ 		st->listen = 1;
+ 		goto top;
+ 	}
  
  	if (st->con == NULL)
  	{
*************** top:
*** 856,864 ****
--- 882,895 ----
  		INSTR_TIME_ACCUM_DIFF(*conn_time, end, start);
  	}
  
+ 	/* Record transaction start time if logging is enabled */
  	if (logfile && st->state == 0)
  		INSTR_TIME_SET_CURRENT(st->txn_begin);
  
+ 	/* Record statement start time if per-command latencies are requested */
+ 	if (is_latencies)
+ 		INSTR_TIME_SET_CURRENT(st->stmt_begin);
+ 
  	if (commands[st->state]->type == SQL_COMMAND)
  	{
  		const Command *command = commands[st->state];
*************** process_commands(char *buf)
*** 1361,1385 ****
  	char	   *p,
  			   *tok;
  
  	if ((p = strchr(buf, '\n')) != NULL)
  		*p = '\0';
  
! 	p = buf;
! 	while (isspace((unsigned char) *p))
! 		p++;
! 
! 	if (*p == '\0' || strncmp(p, "--", 2) == 0)
  	{
  		return NULL;
  	}
  
! 	my_commands = (Command *) malloc(sizeof(Command));
! 	if (my_commands == NULL)
  	{
! 		return NULL;
  	}
  
! 	my_commands->argc = 0;
  
  	if (*p == '\\')
  	{
--- 1392,1428 ----
  	char	   *p,
  			   *tok;
  
+ 	/* Make the string buf end at the next newline */
  	if ((p = strchr(buf, '\n')) != NULL)
  		*p = '\0';
  
! 	/* Allocate and initialize Command structe, initially
! 	 * assuming the line is a comment
! 	 */
! 	my_commands = (Command *) malloc(sizeof(Command));
! 	if (my_commands == NULL)
  	{
  		return NULL;
  	}
+ 	my_commands->type = COMMENT_COMMAND;
+ 	my_commands->line = strdup(buf);
+ 	my_commands->argc = 0;
+ 	my_commands->exec_elapsed = NULL;
+ 	my_commands->exec_count = NULL;
  
! 	p = buf;
! 	
! 	/* Skip leading whitespace */
! 	while (isspace((unsigned char) *p))
  	{
! 		p++;
  	}
  
! 	/* If the line is empty or actually a comment, we're done */
! 	if (*p == '\0' || strncmp(p, "--", 2) == 0)
! 	{
! 		return my_commands;
! 	}
  
  	if (*p == '\\')
  	{
*************** process_file(char *filename)
*** 1548,1570 ****
  	while (fgets(buf, sizeof(buf), fd) != NULL)
  	{
  		Command    *commands;
- 		int			i;
  
! 		i = 0;
! 		while (isspace((unsigned char) buf[i]))
! 			i++;
! 
! 		if (buf[i] != '\0' && strncmp(&buf[i], "--", 2) != 0)
  		{
! 			commands = process_commands(&buf[i]);
! 			if (commands == NULL)
! 			{
! 				fclose(fd);
! 				return false;
! 			}
  		}
- 		else
- 			continue;
  
  		my_commands[lineno] = commands;
  		lineno++;
--- 1591,1603 ----
  	while (fgets(buf, sizeof(buf), fd) != NULL)
  	{
  		Command    *commands;
  
! 		commands = process_commands(buf);
! 		if (commands == NULL)
  		{
! 			fclose(fd);
! 			return false;
  		}
  
  		my_commands[lineno] = commands;
  		lineno++;
*************** printResults(int ttype, int normal_xacts
*** 1660,1665 ****
--- 1693,1699 ----
  				tps_include,
  				tps_exclude;
  	char	   *s;
+ 	int			i;
  
  	time_include = INSTR_TIME_GET_DOUBLE(total_time);
  	tps_include = normal_xacts / time_include;
*************** printResults(int ttype, int normal_xacts
*** 1694,1699 ****
--- 1728,1771 ----
  	}
  	printf("tps = %f (including connections establishing)\n", tps_include);
  	printf("tps = %f (excluding connections establishing)\n", tps_exclude);
+ 	
+ 	/* Report per-command latencies */
+ 	if (is_latencies)
+ 	{
+ 		for(i = 0; i < num_files; ++i) {
+ 			Command** commands;
+ 
+ 			if (num_files > 1)
+ 				printf("command latencies in milliseconds, file %d\n", i+1);
+ 			else
+ 				printf("command latencies in milliseconds\n");				
+ 
+ 			for(commands = sql_files[i]; *commands != NULL; ++commands)
+ 			{
+ 				Command* command = *commands;
+ 				instr_time total_exec_elapsed;
+ 				int total_exec_count;
+ 				int t;
+ 
+ 				/* Accumulate per-thread data if available */
+ 				INSTR_TIME_SET_ZERO(total_exec_elapsed);
+ 				total_exec_count = 0;
+ 				if ((command->exec_elapsed != NULL) && (command->exec_count != NULL))
+ 				{
+ 					for(t = 0; t < nthreads; ++t)
+ 					{
+ 						INSTR_TIME_ADD(total_exec_elapsed, command->exec_elapsed[t]);
+ 						total_exec_count += command->exec_count[t];
+ 					}
+ 				}
+ 
+ 				if (total_exec_count > 0)
+ 					printf("\t%f\t%s\n", INSTR_TIME_GET_MILLISEC(total_exec_elapsed) / (double)total_exec_count, command->line);
+ 				else
+ 					printf("\t%f\t%s\n", 0.0, command->line);
+ 			}
+ 		}
+ 	}
  }
  
  
*************** main(int argc, char **argv)
*** 1770,1776 ****
  
  	memset(state, 0, sizeof(*state));
  
! 	while ((c = getopt(argc, argv, "ih:nvp:dSNc:Cs:t:T:U:lf:D:F:M:j:")) != -1)
  	{
  		switch (c)
  		{
--- 1842,1848 ----
  
  	memset(state, 0, sizeof(*state));
  
! 	while ((c = getopt(argc, argv, "ih:nvp:dSNc:Cs:t:T:U:rlf:D:F:M:j:")) != -1)
  	{
  		switch (c)
  		{
*************** main(int argc, char **argv)
*** 1834,1839 ****
--- 1906,1914 ----
  			case 'C':
  				is_connect = true;
  				break;
+ 			case 'r':
+ 				is_latencies = true;
+ 				break;
  			case 's':
  				scale_given = true;
  				scale = atoi(optarg);
*************** main(int argc, char **argv)
*** 2090,2095 ****
--- 2165,2193 ----
  		default:
  			break;
  	}
+ 	
+ 	/* Reserve memory for the threads to store per-command latencies if requested */
+ 	if (is_latencies)
+ 	{
+ 		for(i = 0; i < num_files; ++i)
+ 		{
+ 			Command** commands;
+ 			for(commands = sql_files[i]; *commands != NULL; ++commands) 
+ 			{
+ 				Command* command = *commands;
+ 				int t;
+ 
+ 				command->exec_elapsed = (instr_time*) malloc(sizeof(instr_time) * nthreads);
+ 				command->exec_count = (int*) malloc(sizeof(int) * nthreads);
+ 
+ 				for(t = 0; t < nthreads; ++t)
+ 				{
+ 					INSTR_TIME_SET_ZERO(command->exec_elapsed[t]);
+ 					command->exec_count[t] = 0;
+ 				}
+ 			}
+ 		}
+ 	}
  
  	/* get start up time */
  	INSTR_TIME_SET_CURRENT(start_time);
*************** threadRun(void *arg)
*** 2211,2217 ****
  		int			prev_ecnt = st->ecnt;
  
  		st->use_file = getrand(0, num_files - 1);
! 		if (!doCustom(st, &result->conn_time, logfile))
  			remains--;			/* I've aborted */
  
  		if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
--- 2309,2315 ----
  		int			prev_ecnt = st->ecnt;
  
  		st->use_file = getrand(0, num_files - 1);
! 		if (!doCustom(thread, st, &result->conn_time, logfile))
  			remains--;			/* I've aborted */
  
  		if (st->ecnt > prev_ecnt && commands[st->state]->type == META_COMMAND)
*************** threadRun(void *arg)
*** 2260,2266 ****
  			{
  				continue;
  			}
! 			else if (commands[st->state]->type == META_COMMAND)
  			{
  				min_usec = 0;	/* the connection is ready to run */
  				break;
--- 2358,2364 ----
  			{
  				continue;
  			}
! 			else if ((commands[st->state]->type == META_COMMAND) || (commands[st->state]->type == COMMENT_COMMAND))
  			{
  				min_usec = 0;	/* the connection is ready to run */
  				break;
*************** threadRun(void *arg)
*** 2313,2319 ****
  			if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
  							|| commands[st->state]->type == META_COMMAND))
  			{
! 				if (!doCustom(st, &result->conn_time, logfile))
  					remains--;	/* I've aborted */
  			}
  
--- 2411,2417 ----
  			if (st->con && (FD_ISSET(PQsocket(st->con), &input_mask)
  							|| commands[st->state]->type == META_COMMAND))
  			{
! 				if (!doCustom(thread, st, &result->conn_time, logfile))
  					remains--;	/* I've aborted */
  			}
  
diff --git a/doc/src/sgml/pgbench.sgml b/doc/src/sgml/pgbench.sgml
index cdad212..ba9b2ab 100644
*** a/doc/src/sgml/pgbench.sgml
--- b/doc/src/sgml/pgbench.sgml
*************** pgbench <optional> <replaceable>options<
*** 237,242 ****
--- 237,252 ----
       </varlistentry>
  
       <varlistentry>
+       <term><option>-r</option></term>
+       <listitem>
+        <para>
+         Report the average latency of each command after the benchmark finishes.
+         See below for details.
+        </para>
+       </listitem>
+      </varlistentry>
+ 
+      <varlistentry>
        <term><option>-l</option></term>
        <listitem>
         <para>
*************** END;
*** 609,614 ****
--- 619,687 ----
   </sect2>
  
   <sect2>
+   <title>Per-statement latencies</title>
+   <para>
+    With the <literal>-r</> option, <application>pgbench</> computes
+    an average of each statement's latency and reports those values
+    after the benchmark has finished.
+   </para>
+ 
+   <para>
+    For the default script, the output will look similar to
+ <screen>
+ starting vacuum...end.
+ transaction type: TPC-B (sort of)
+ scaling factor: 1
+ query mode: simple
+ number of clients: 10
+ number of threads: 1
+ number of transactions per client: 1000
+ number of transactions actually processed: 10000/10000
+ tps = 618.764555 (including connections establishing)
+ tps = 622.977698 (excluding connections establishing)
+ command latencies in milliseconds
+ 	0.004386	\set nbranches 1 * :scale
+ 	0.001343	\set ntellers 10 * :scale
+ 	0.001212	\set naccounts 100000 * :scale
+ 	0.001310	\setrandom aid 1 :naccounts
+ 	0.001073	\setrandom bid 1 :nbranches
+ 	0.001005	\setrandom tid 1 :ntellers
+ 	0.001078	\setrandom delta -5000 5000
+ 	0.326152	BEGIN;
+ 	0.603376	UPDATE pgbench_accounts SET abalance = abalance + :delta WHERE aid = :aid;
+ 	0.454643	SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
+ 	5.528491	UPDATE pgbench_tellers SET tbalance = tbalance + :delta WHERE tid = :tid;
+ 	7.335435	UPDATE pgbench_branches SET bbalance = bbalance + :delta WHERE bid = :bid;
+ 	0.371851	INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
+ 	1.212976	END;
+ </screen>
+   </para>
+ 
+   <para>
+    If multiple script files are specified, the averages are reported
+    separately for each individual script file file, as in the following example
+ <screen>
+ starting vacuum...end.
+ transaction type: Custom query
+ scaling factor: 1
+ query mode: simple
+ number of clients: 10
+ number of threads: 1
+ number of transactions per client: 1000
+ number of transactions actually processed: 10000/10000
+ tps = 162.902251 (including connections establishing)
+ tps = 163.166833 (excluding connections establishing)
+ command latencies in milliseconds, file 1
+ 	2.534675	SELECT * FROM pgbench_branches LIMIT 1;
+ 	1.168928	SELECT COUNT(*) FROM pgbench_branches;
+ command latencies in milliseconds, file 2
+ 	2.394322	SELECT * FROM pgbench_accounts LIMIT 1;
+ 	113.624613	SELECT COUNT(*) FROM pgbench_accounts;
+ </screen>
+   </para>
+  </sect2>
+ 
+  <sect2>
    <title>Per-transaction logging</title>
  
    <para>
