On Wed, Mar 8, 2023 at 2:23 PM Andres Freund <and...@anarazel.de> wrote:
>
> On 2023-03-08 13:44:32 -0500, Melanie Plageman wrote:
> > However, I am concerned that, while unlikely, this could be flakey.
> > Something could happen to force all of those blocks out of shared
> > buffers (even though they were just read in) before we hit them.
>
> You could make the test query a simple nested loop self-join, that'll prevent
> the page being evicted, because it'll still be pinned on the outer side, while
> generating hits on the inner side.

Good idea. v3 attached.
From e15bd63d662780898ef6fd584608acb13a12dbe1 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Sat, 25 Feb 2023 14:37:25 -0500
Subject: [PATCH v3 1/2] Reorder pgstatfuncs-local enum

---
 src/backend/utils/adt/pgstatfuncs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index b61a12382b..1112bc2904 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1276,16 +1276,16 @@ pgstat_get_io_op_index(IOOp io_op)
 	{
 		case IOOP_EVICT:
 			return IO_COL_EVICTIONS;
+		case IOOP_EXTEND:
+			return IO_COL_EXTENDS;
+		case IOOP_FSYNC:
+			return IO_COL_FSYNCS;
 		case IOOP_READ:
 			return IO_COL_READS;
 		case IOOP_REUSE:
 			return IO_COL_REUSES;
 		case IOOP_WRITE:
 			return IO_COL_WRITES;
-		case IOOP_EXTEND:
-			return IO_COL_EXTENDS;
-		case IOOP_FSYNC:
-			return IO_COL_FSYNCS;
 	}
 
 	elog(ERROR, "unrecognized IOOp value: %d", io_op);
-- 
2.37.2

From b648727abe315375fbf13d314fd3b9398a75b26f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Sat, 25 Feb 2023 14:36:06 -0500
Subject: [PATCH v3 2/2] Track shared buffer hits in pg_stat_io

Count new IOOP_HITs and add "hits" column to pg_stat_io.

Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml           | 11 ++++++++
 src/backend/catalog/system_views.sql   |  1 +
 src/backend/storage/buffer/bufmgr.c    | 38 ++++++++++----------------
 src/backend/storage/buffer/localbuf.c  | 11 ++------
 src/backend/utils/activity/pgstat_io.c |  2 +-
 src/backend/utils/adt/pgstatfuncs.c    |  3 ++
 src/include/catalog/pg_proc.dat        |  6 ++--
 src/include/pgstat.h                   |  1 +
 src/include/storage/buf_internals.h    |  2 +-
 src/test/regress/expected/rules.out    |  3 +-
 src/test/regress/expected/stats.out    | 34 +++++++++++++++++++++--
 src/test/regress/sql/stats.sql         | 21 ++++++++++++--
 12 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 6249bb50d0..8b34ca60bc 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>hits</structfield> <type>bigint</type>
+       </para>
+       <para>
+        The number of times a desired block was found in a shared buffer.
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 34ca0e739f..87bbbdfcb3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1126,6 +1126,7 @@ SELECT
        b.writes,
        b.extends,
        b.op_bytes,
+       b.hits,
        b.evictions,
        b.reuses,
        b.fsyncs,
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0a05577b68..05fd3c9a2a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   ForkNumber forkNum,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
-							   bool *foundPtr, IOContext *io_context);
+							   bool *foundPtr, IOContext io_context);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
 						IOObject io_object, IOContext io_context);
 static void FindAndDropRelationBuffers(RelFileLocator rlocator,
@@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if (isLocalBuf)
 	{
 		/*
-		 * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
-		 * do not use a BufferAccessStrategy for I/O of temporary tables.
+		 * We do not use a BufferAccessStrategy for I/O of temporary tables.
 		 * However, in some cases, the "strategy" may not be NULL, so we can't
 		 * rely on IOContextForStrategy() to set the right IOContext for us.
 		 * This may happen in cases like CREATE TEMPORARY TABLE AS...
 		 */
-		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
+		io_context = IOCONTEXT_NORMAL;
+		io_object = IOOBJECT_TEMP_RELATION;
+		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
 		if (found)
 			pgBufferUsage.local_blks_hit++;
 		else if (isExtend)
@@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is
 		 * not currently in memory.
 		 */
+		io_context = IOContextForStrategy(strategy);
+		io_object = IOOBJECT_RELATION;
 		bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
-							 strategy, &found, &io_context);
+							 strategy, &found, io_context);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
 		else if (isExtend)
@@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			/* Just need to update stats before we exit */
 			*hit = true;
 			VacuumPageHit++;
+			pgstat_count_io_op(io_object, io_context, IOOP_HIT);
 
 			if (VacuumCostActive)
 				VacuumCostBalance += VacuumCostPageHit;
@@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID));	/* spinlock not needed */
 
-	if (isLocalBuf)
-	{
-		bufBlock = LocalBufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_TEMP_RELATION;
-	}
-	else
-	{
-		bufBlock = BufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_RELATION;
-	}
+	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
 
 	if (isExtend)
 	{
@@ -1139,7 +1134,7 @@ static BufferDesc *
 BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
-			bool *foundPtr, IOContext *io_context)
+			bool *foundPtr, IOContext io_context)
 {
 	bool		from_ring;
 	BufferTag	newTag;			/* identity of requested block */
@@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			{
 				/*
 				 * If we get here, previous attempts to read the buffer must
-				 * have failed ... but we shall bravely try again. Set
-				 * io_context since we will in fact need to count an IO
-				 * Operation.
+				 * have failed ... but we shall bravely try again.
 				 */
-				*io_context = IOContextForStrategy(strategy);
 				*foundPtr = false;
 			}
 		}
@@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	LWLockRelease(newPartitionLock);
 
-	*io_context = IOContextForStrategy(strategy);
-
 	/* Loop here in case we have to try another victim buffer */
 	for (;;)
 	{
@@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rlocator.locator.dbOid,
 														  smgr->smgr_rlocator.locator.relNumber);
 
-				FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
+				FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * we may have been forced to release the buffer due to concurrent
 		 * pinners or erroring out.
 		 */
-		pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
+		pgstat_count_io_op(IOOBJECT_RELATION, io_context,
 						   from_ring ? IOOP_REUSE : IOOP_EVICT);
 	}
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 5325ddb663..880c9909b9 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
  */
 BufferDesc *
 LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
-				 bool *foundPtr, IOContext *io_context)
+				 bool *foundPtr)
 {
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
@@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	hresult = (LocalBufferLookupEnt *)
 		hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);
 
-	/*
-	 * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
-	 * io_context here (instead of after a buffer hit would have returned) for
-	 * convenience since we don't have to worry about the overhead of calling
-	 * IOContextForStrategy().
-	 */
-	*io_context = IOCONTEXT_NORMAL;
-
 	if (hresult)
 	{
 		b = hresult->id;
@@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
+		/* Temporary table I/O does not use Buffer Access Strategies */
 		pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
 		pgBufferUsage.local_blks_written++;
 	}
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index af5d554610..ae8bb34f78 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 	 * Some BackendTypes will not do certain IOOps.
 	 */
 	if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
-		(io_op == IOOP_READ || io_op == IOOP_EVICT))
+		(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
 		return false;
 
 	if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 1112bc2904..e2e79eca99 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1258,6 +1258,7 @@ typedef enum io_stat_col
 	IO_COL_WRITES,
 	IO_COL_EXTENDS,
 	IO_COL_CONVERSION,
+	IO_COL_HITS,
 	IO_COL_EVICTIONS,
 	IO_COL_REUSES,
 	IO_COL_FSYNCS,
@@ -1280,6 +1281,8 @@ pgstat_get_io_op_index(IOOp io_op)
 			return IO_COL_EXTENDS;
 		case IOOP_FSYNC:
 			return IO_COL_FSYNCS;
+		case IOOP_HIT:
+			return IO_COL_HITS;
 		case IOOP_READ:
 			return IO_COL_READS;
 		case IOOP_REUSE:
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 505595620e..615139c2f9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5721,9 +5721,9 @@
   proname => 'pg_stat_get_io', provolatile => 'v',
   prorows => '30', proretset => 't',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
+  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
   prosrc => 'pg_stat_get_io' },
 
 { oid => '1136', descr => 'statistics: information about WAL activity',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ed..a7f77c17b3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -304,6 +304,7 @@ typedef enum IOOp
 	IOOP_EVICT,
 	IOOP_EXTEND,
 	IOOP_FSYNC,
+	IOOP_HIT,
 	IOOP_READ,
 	IOOP_REUSE,
 	IOOP_WRITE,
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 0b44814740..2afb9bb309 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
 												ForkNumber forkNum,
 												BlockNumber blockNum);
 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
-									BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
+									BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelationLocalBuffers(RelFileLocator rlocator,
 									 ForkNumber forkNum,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e953d1f515..c994da9f97 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1883,11 +1883,12 @@ pg_stat_io| SELECT backend_type,
     writes,
     extends,
     op_bytes,
+    hits,
     evictions,
     reuses,
     fsyncs,
     stats_reset
-   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
+   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
     d.datname,
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 186c296299..7d62ef5f17 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
 -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@@ -1208,6 +1209,35 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
  t
 (1 row)
 
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+-- Select from the table again to count hits.
+-- Ensure we generate hits by forcing a nested loop join with no materialize
+-- node. The outer side buffer will stay pinned, preventing its eviction, while
+-- we loop through the inner side and generate hits.
+SET enable_nestloop TO on; SET enable_mergejoin TO off; SET enable_hashjoin TO off;
+SET enable_material TO off;
+SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
+ count 
+-------
+   100
+(1 row)
+
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+ ?column? 
+----------
+ t
+(1 row)
+
+RESET enable_mergejoin; RESET enable_hashjoin; RESET enable_material;
 DROP TABLE test_io_shared;
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
 -- - eviction of local buffers in order to reuse them
@@ -1342,7 +1372,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
  t
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
  pg_stat_reset_shared 
@@ -1350,7 +1380,7 @@ SELECT pg_stat_reset_shared('io');
  
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
  ?column? 
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index d7f873cfc9..495e48990d 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
@@ -588,6 +589,22 @@ SELECT pg_stat_force_next_flush();
 SELECT sum(reads) AS io_sum_shared_after_reads
   FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation'  \gset
 SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
+
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+-- Select from the table again to count hits.
+-- Ensure we generate hits by forcing a nested loop join with no materialize
+-- node. The outer side buffer will stay pinned, preventing its eviction, while
+-- we loop through the inner side and generate hits.
+SET enable_nestloop TO on; SET enable_mergejoin TO off; SET enable_hashjoin TO off;
+SET enable_material TO off;
+SELECT COUNT(*) FROM test_io_shared t1 INNER JOIN test_io_shared t2 USING (a);
+SELECT pg_stat_force_next_flush();
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+RESET enable_mergejoin; RESET enable_hashjoin; RESET enable_material;
+
 DROP TABLE test_io_shared;
 
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
@@ -675,10 +692,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
 
 -- Test IO stats reset
 SELECT pg_stat_have_stats('io', 0, 0);
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
 
-- 
2.37.2

Reply via email to