Hello, I would like to get some feedback on that task.
> pg_statio_*_tables.idx_blks_hit are highly misleading in practice > because they fail to take account of the difference between internal > pages and leaf pages in B-Tree indexes. I see it is still the case, so the issue is relevant, isn't it ? > The main challenge would be > passing information about what page we're dealing with (internal/leaf) > to the place actually calling pgstat_count_buffer_(read|hit). That > happens in ReadBufferExtended, which just has no idea what page it's > dealing with. Not sure how to do that cleanly ... I do not immediately see the way to pass the information in a completely clean manner. Either (1) ReadBufferExtended needs to know the type of an index page (leaf/internal) or (2) caller of ReadBufferExtended that can check the page type needs to learn if there was a hit and call pgstat_count_buffer_(read|hit) accordingly. In either case necessary code changes seem quite invasive to me. I have attached a code snippet to illustrate the second idea. Regards, Sergey
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 20adb602a4..d8c22be949 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -31,6 +31,7 @@ #include "miscadmin.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" +#include "pgstat.h" #include "storage/predicate.h" #include "storage/procarray.h" #include "utils/memdebug.h" @@ -870,14 +871,23 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, FullTransactionId safexid) Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access) { - Buffer buf; + Buffer buf; + Page page; + BTPageOpaque opaque; if (blkno != P_NEW) { + bool hit; /* Read an existing block of the relation */ - buf = ReadBuffer(rel, blkno); + buf = ReadIndexBuffer(rel, blkno, &hit); _bt_lockbuf(rel, buf, access); _bt_checkpage(rel, buf); + + page = BufferGetPage(buf); + opaque = BTPageGetOpaque(page); + if (hit && P_ISLEAF(opaque)) + pgstat_count_buffer_hit(rel); + } else { diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index ae13011d27..ab9522fd50 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -704,6 +704,16 @@ ReadBuffer(Relation reln, BlockNumber blockNum) return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL); } +/* + * ReadIndexBuffer -- a shorthand for ReadIndexBufferExtended, for reading from main + * fork with RBM_NORMAL mode and default strategy. + */ +Buffer +ReadIndexBuffer(Relation reln, BlockNumber blockNum, bool *hit) +{ + return ReadIndexBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL, hit); +} + /* * ReadBufferExtended -- returns a buffer containing the requested * block of the requested relation. If the blknum @@ -774,6 +784,34 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, return buf; } +/* + * Proof-of-concept. + * + * Same as ReadBufferExtended but returns the value of "hit" to improve + * statistics collection for indexes. + */ +Buffer +ReadIndexBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) +{ + Buffer buf; + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(reln)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence, + forkNum, blockNum, mode, strategy, hit); + + return buf; +} + /* * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 58391406f6..d4483c1666 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -179,9 +179,13 @@ extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum, extern bool ReadRecentBuffer(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, Buffer recent_buffer); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); +extern Buffer ReadIndexBuffer(Relation reln, BlockNumber blockNum, bool *hit); extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); +extern Buffer ReadIndexBufferExtended(Relation reln, ForkNumber forkNum, + BlockNumber blockNum, ReadBufferMode mode, + BufferAccessStrategy strategy, bool *hit); extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy,