Hi All,

I have introduced a new function 'pgstathashindex()' inside pgstatuple
extension to view the statistics related to hash index table. I could
have used 'pgstattuple()' function to view hash index stats instead of
adding this new function but there are certain limitations when using
pgstattuple() for hash indexes. Firstly, it doesn't work if a hash
index contains zero or new pages which is very common in case of hash
indexes. Secondly, it doesn't provide information about different
types of pages in hash index and its count. Considering these points,
I have thought of introducing this function. Attached is the patch for
the same. Please have a look and let me your feedback. I would also
like to mention that this idea basically came from my colleague Kuntal
Ghosh and i  implemented it. I have also created a commit-fest entry
for this submission. Thanks.

With Regards,
Ashutosh Sharma
EnterpriseDB:http://www.enterprisedb.com
From a82d350b48b9daee017d2f31dee136809333ea82 Mon Sep 17 00:00:00 2001
From: ashu <ashu@localhost.localdomain>
Date: Wed, 21 Dec 2016 18:39:47 +0530
Subject: [PATCH] Add pgstathashindex() to pgstattuple extension v1

This allows us to see the hash index table statistics.
We could have directly used pgstattuple() to see the
hash table statistics but it doesn't include some of
the stats related to hash index like number of bucket
pages, overflow pages, zero pages etc. Moreover, it
can't be used if hash index table contains a zero page.

Patch by Ashutosh. Needs review.
---
 contrib/pgstattuple/Makefile                  |   8 +-
 contrib/pgstattuple/pgstatindex.c             | 225 ++++++++++++++++++++++++++
 contrib/pgstattuple/pgstattuple--1.4.sql      |  15 ++
 contrib/pgstattuple/pgstattuple--1.5--1.6.sql |  22 +++
 contrib/pgstattuple/pgstattuple.control       |   2 +-
 doc/src/sgml/pgstattuple.sgml                 | 108 +++++++++++++
 6 files changed, 375 insertions(+), 5 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstattuple--1.5--1.6.sql

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 294077d..a1601ec 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -4,10 +4,10 @@ MODULE_big	= pgstattuple
 OBJS		= pgstattuple.o pgstatindex.o pgstatapprox.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.4.sql pgstattuple--1.4--1.5.sql \
-	pgstattuple--1.3--1.4.sql pgstattuple--1.2--1.3.sql \
-	pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql \
-	pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.5--1.6.sql pgstattuple--1.4--1.5.sql \
+	pgstattuple--1.4.sql pgstattuple--1.3--1.4.sql \
+	pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql	\
+	pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index d9a722a..a3c8f23 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -29,6 +29,7 @@
 
 #include "access/gin_private.h"
 #include "access/heapam.h"
+#include "access/hash.h"
 #include "access/htup_details.h"
 #include "access/nbtree.h"
 #include "catalog/namespace.h"
@@ -36,6 +37,7 @@
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
+#include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/rel.h"
 
@@ -53,6 +55,7 @@ PG_FUNCTION_INFO_V1(pgstatindexbyid);
 PG_FUNCTION_INFO_V1(pg_relpages);
 PG_FUNCTION_INFO_V1(pg_relpagesbyid);
 PG_FUNCTION_INFO_V1(pgstatginindex);
+PG_FUNCTION_INFO_V1(pgstathashindex);
 
 PG_FUNCTION_INFO_V1(pgstatindex_v1_5);
 PG_FUNCTION_INFO_V1(pgstatindexbyid_v1_5);
@@ -60,11 +63,17 @@ PG_FUNCTION_INFO_V1(pg_relpages_v1_5);
 PG_FUNCTION_INFO_V1(pg_relpagesbyid_v1_5);
 PG_FUNCTION_INFO_V1(pgstatginindex_v1_5);
 
+PG_FUNCTION_INFO_V1(pgstathashindex_v1_6);
+
 Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo);
+Datum pgstathashindex_internal(Oid relid, FunctionCallInfo fcinfo);
 
 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
 #define IS_GIN(r) ((r)->rd_rel->relam == GIN_AM_OID)
+#define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
+
+#define HASH_HEAD_BLKNO HASH_METAPAGE + 1
 
 /* ------------------------------------------------
  * A structure for a whole btree index statistics
@@ -101,7 +110,29 @@ typedef struct GinIndexStat
 	int64		pending_tuples;
 } GinIndexStat;
 
+/* ------------------------------------------------
+ * A structure for a whole HASH index statistics
+ * used by pgstathashindex().
+ * ------------------------------------------------
+ */
+typedef struct HashIndexStat
+{
+	uint32	version;
+	uint32	total_pages;
+	uint32	bucket_pages;
+	uint32	overflow_pages;
+	uint32	bitmap_pages;
+	uint32	zero_pages;
+	uint64	ntuples;
+	uint16	ffactor;
+	uint64	live_items;
+	uint64	dead_items;
+	uint64	free_space;
+} HashIndexStat;
+
 static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo);
+static void GetHashPageStats(Page page, HashIndexStat *stats);
+
 
 /* ------------------------------------------------------
  * pgstatindex()
@@ -527,3 +558,197 @@ pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo)
 
 	return (result);
 }
+
+/* ------------------------------------------------------
+ * pgstathashindex()
+ *
+ * Usage: SELECT * FROM pgstathashindex('hashindex');
+ *
+ * Must keep superuser() check, see above.
+ * ------------------------------------------------------
+ */
+Datum
+pgstathashindex(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use pgstattuple functions"))));
+
+	PG_RETURN_DATUM(pgstathashindex_internal(relid, fcinfo));
+}
+
+/* No need for superuser checks from v1.5 onwards, see above */
+Datum
+pgstathashindex_v1_6(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+
+	PG_RETURN_DATUM(pgstathashindex_internal(relid, fcinfo));
+}
+
+Datum
+pgstathashindex_internal(Oid relid, FunctionCallInfo fcinfo)
+{
+	BlockNumber	nblocks;
+	BlockNumber	blkno;
+	Relation	rel;
+	HashIndexStat stats = {0};
+	HeapTuple	tuple;
+	TupleDesc	tupleDesc;
+	Datum		values[11];
+	bool		nulls[11];
+	Datum		result;
+	Buffer		metabuf;
+	HashMetaPage	metap;
+	float8		free_percent;
+	uint64		table_len;
+
+	rel = index_open(relid, AccessShareLock);
+
+	if (!IS_HASH(rel))
+		elog(ERROR, "relation \"%s\" is not a HASH index",
+			 RelationGetRelationName(rel));
+
+	/*
+	 * Reject attempts to read non-local temporary relations; we would be
+	 * likely to get wrong data since we have no visibility into the owning
+	 * session's local buffers.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			   errmsg("cannot access temporary indexes of other sessions")));
+
+	/* Get the current relation length */
+	LockRelationForExtension(rel, ExclusiveLock);
+	nblocks = RelationGetNumberOfBlocks(rel);
+	UnlockRelationForExtension(rel, ExclusiveLock);
+
+	stats.total_pages = nblocks;
+
+	for (blkno = HASH_HEAD_BLKNO; blkno < nblocks; blkno++)
+	{
+		Buffer		buf;
+		Page		page;
+		HashPageOpaque	opaque;
+
+		CHECK_FOR_INTERRUPTS();
+
+		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, NULL);
+		LockBuffer(buf, BUFFER_LOCK_SHARE);
+		page = (Page) BufferGetPage(buf);
+
+		if (PageIsNew(page))
+			stats.zero_pages++;
+		else if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" contains corrupted page at block %u",
+							RelationGetRelationName(rel),
+							BufferGetBlockNumber(buf)),
+			errhint("Please REINDEX it.")));
+		else
+		{
+			opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+			if (opaque->hasho_flag & LH_BUCKET_PAGE)
+			{
+				stats.bucket_pages++;
+				GetHashPageStats(page, &stats);
+			}
+			else if (opaque->hasho_flag & LH_OVERFLOW_PAGE)
+			{
+				stats.overflow_pages++;
+				GetHashPageStats(page, &stats);
+			}
+			else if (opaque->hasho_flag & LH_BITMAP_PAGE)
+				stats.bitmap_pages++;
+			else
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+					errmsg("unexpected page type 0x%04X in HASH index \"%s\" block %u",
+							opaque->hasho_flag, RelationGetRelationName(rel),
+							BufferGetBlockNumber(buf))));
+		}
+		UnlockReleaseBuffer(buf);
+	}
+
+	/* Read the metapage so we can determine things like ntuples, ffactor etc. */
+	metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
+	metap = HashPageGetMeta(BufferGetPage(metabuf));
+
+	stats.ntuples = metap->hashm_ntuples;
+	stats.version = metap->hashm_version;
+	stats.ffactor = metap->hashm_ffactor;
+
+	stats.free_space += (BLCKSZ * stats.zero_pages);
+
+	/*
+	 * Let us ignore metapage and bitmap page when calculating
+	 * free space percentage for tuples in a table.
+	 */
+	table_len = (stats.total_pages - 2) * BLCKSZ;
+
+	free_percent = 100.0 * stats.free_space / table_len;
+
+	_hash_relbuf(rel, metabuf);
+
+	index_close(rel, AccessShareLock);
+
+	/*
+	 * Build a tuple descriptor for our result type
+	 */
+	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	tupleDesc = BlessTupleDesc(tupleDesc);
+
+	MemSet(nulls, 0, sizeof(nulls));
+
+	values[0] = UInt32GetDatum(stats.version);
+	values[1] = UInt32GetDatum(stats.total_pages);
+	values[2] = UInt32GetDatum(stats.bucket_pages);
+	values[3] = UInt32GetDatum(stats.overflow_pages);
+	values[4] = UInt32GetDatum(stats.bitmap_pages);
+	values[5] = UInt32GetDatum(stats.zero_pages);
+	values[6] = UInt64GetDatum(stats.ntuples);
+	values[7] = UInt16GetDatum(stats.ffactor);
+	values[8] = UInt64GetDatum(stats.live_items);
+	values[9] = UInt64GetDatum(stats.dead_items);
+	values[10] = Float8GetDatum(free_percent);
+
+	/*
+	 * Build and return the tuple
+	 */
+	tuple = heap_form_tuple(tupleDesc, values, nulls);
+	result = HeapTupleGetDatum(tuple);
+
+	return (result);
+}
+
+/* -------------------------------------------------
+ * GetHashPageStatis()
+ *
+ * Collect statistics of single hash page
+ * -------------------------------------------------
+ */
+static void
+GetHashPageStats(Page page, HashIndexStat *stats)
+{
+	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+	int off;
+
+	/* count live and dead tuples, and free space */
+	for (off = FirstOffsetNumber; off <= maxoff; off++)
+	{
+		ItemId      id = PageGetItemId(page, off);
+
+		if (!ItemIdIsDead(id))
+			stats->live_items++;
+		else
+			stats->dead_items++;
+	}
+	stats->free_space += PageGetFreeSpace(page);
+}
diff --git a/contrib/pgstattuple/pgstattuple--1.4.sql b/contrib/pgstattuple/pgstattuple--1.4.sql
index 47377eb..94318bd 100644
--- a/contrib/pgstattuple/pgstattuple--1.4.sql
+++ b/contrib/pgstattuple/pgstattuple--1.4.sql
@@ -35,6 +35,21 @@ RETURNS BIGINT
 AS 'MODULE_PATHNAME', 'pg_relpages'
 LANGUAGE C STRICT PARALLEL SAFE;
 
+CREATE FUNCTION pgstathashindex(IN relname regclass,
+    OUT version INT4,
+    OUT total_pages INT4,
+    OUT bucket_pages INT4,
+    OUT overflow_pages INT4,
+    OUT bitmap_pages INT4,
+    OUT zero_pages INT4,
+    OUT ntuples BIGINT,
+    OUT ffactor BIGINT,
+    OUT live_items BIGINT,
+    OUT dead_items BIGINT,
+    OUT free_percent FLOAT8)
+AS 'MODULE_PATHNAME', 'pgstathashindex'
+LANGUAGE C STRICT PARALLEL SAFE;
+
 /* New stuff in 1.1 begins here */
 
 CREATE FUNCTION pgstatginindex(IN relname regclass,
diff --git a/contrib/pgstattuple/pgstattuple--1.5--1.6.sql b/contrib/pgstattuple/pgstattuple--1.5--1.6.sql
new file mode 100644
index 0000000..b44ad5e
--- /dev/null
+++ b/contrib/pgstattuple/pgstattuple--1.5--1.6.sql
@@ -0,0 +1,22 @@
+/* contrib/pgstattuple/pgstattuple--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.6'" to load this file. \quit
+
+
+CREATE OR REPLACE FUNCTION pgstathashindex(IN relname regclass,
+	OUT version INT4,
+	OUT total_pages INT4,
+	OUT bucket_pages INT4,
+	OUT overflow_pages INT4,
+	OUT bitmap_pages INT4,
+	OUT zero_pages INT4,
+	OUT ntuples BIGINT,
+	OUT ffactor BIGINT,
+	OUT live_items BIGINT,
+	OUT dead_items BIGINT,
+	OUT free_percent FLOAT8)
+AS 'MODULE_PATHNAME', 'pgstathashindex_v1_6'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pgstatginindex(regclass) FROM PUBLIC;
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index 6af4075..80d0695 100644
--- a/contrib/pgstattuple/pgstattuple.control
+++ b/contrib/pgstattuple/pgstattuple.control
@@ -1,5 +1,5 @@
 # pgstattuple extension
 comment = 'show tuple-level statistics'
-default_version = '1.5'
+default_version = '1.6'
 module_pathname = '$libdir/pgstattuple'
 relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 9ada5d2..379fc79 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -345,6 +345,114 @@ pending_tuples | 0
    <varlistentry>
     <term>
      <indexterm>
+      <primary>pgstathashindex</primary>
+     </indexterm>
+     <function>pgstathashindex(regclass) returns record</>
+    </term>
+
+    <listitem>
+     <para>
+      <function>pgstathashindex</function> returns a record showing information
+      about a HASH index.  For example:
+<programlisting>
+test=&gt; select * from pgstathashindex('con_hash_index');
+ version | total_pages | bucket_pages | overflow_pages | bitmap_pages | zero_pages | ntuples | ffactor | live_items | dead_items |   free_percent   
+---------+-------------+--------------+----------------+--------------+------------+---------+---------+------------+------------+------------------
+       2 |       34781 |        17150 |           2011 |            1 |      15618 |  686000 |      40 |     942926 |          0 | 93.0849623133752
+</programlisting>
+     </para>
+
+    <para>
+     The output columns are:
+
+    <informaltable>
+     <tgroup cols="3">
+      <thead>
+       <row>
+        <entry>Column</entry>
+        <entry>Type</entry>
+        <entry>Description</entry>
+       </row>
+      </thead>
+
+      <tbody>
+       <row>
+        <entry><structfield>version</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>HASH version number</entry>
+       </row>
+
+       <row>
+        <entry><structfield>total_pages</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Total number of pages in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>bucket_pages</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Total number of bucket pages in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>overflow_pages</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Total number of overflow pages in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>bitmap_pages</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Total number of bitmap pages in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>zero_pages</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Total number of new or zero pages in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>ntuples</structfield></entry>
+        <entry><type>bigint</type></entry>
+        <entry>Total number of tuples in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>ffactor</structfield></entry>
+        <entry><type>integer</type></entry>
+        <entry>Average number of tuples per bucket</entry>
+       </row>
+
+       <row>
+        <entry><structfield>live_items</structfield></entry>
+        <entry><type>bigint</type></entry>
+        <entry>Total number of alive tuples in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>dead_tuples</structfield></entry>
+        <entry><type>bigint</type></entry>
+        <entry>Total number of dead tuples in the hash table</entry>
+       </row>
+
+       <row>
+        <entry><structfield>free_percent</structfield></entry>
+        <entry><type>float</type></entry>
+        <entry>Percentage of free space available in the hash table</entry>
+       </row>
+
+      </tbody>
+     </tgroup>
+    </informaltable>
+    </para>
+    </listitem>
+   </varlistentry>
+
+
+   <varlistentry>
+    <term>
+     <indexterm>
       <primary>pg_relpages</primary>
      </indexterm>
      <function>pg_relpages(regclass) returns bigint</>
-- 
1.8.3.1

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to