Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

Abhijit Menon-Sen Mon, 11 May 2015 04:28:06 -0700

Hi Andres.

I've attached an updated patch for pgstatbloat, as well as a patch to
replace two uses of BuildTupleFromCStrings() elsewhere in pgstattuple.

I've made the changes you mentioned in your earlier mail, except that I
have not changed the name pending further suggestions about what would
be the best name.

Also:

At 2015-05-09 15:36:49 +0530, [email protected] wrote:
>
> At 2015-05-09 02:20:51 +0200, [email protected] wrote:
> >
> > I haven't checked, but I'm not sure that it's safe/meaningful to
> > call PageGetHeapFreeSpace() on a new page.
> 
> OK, I'll check and fix if necessary.

You're right, PageGetHeapFreeSpace() isn't safe on a new page. I've
added a guard to that call in the attached patch, but I'm not sure
that's the right thing to do.

Should I copy the orphaned new-page handling from lazy_scan_heap? What
about for empty pages? Neither feels like a very good thing to do, but
then neither does skipping the new page silently. Should I count the
space it would have free if it were initialised, but leave the page
alone for VACUUM to deal with? Or just leave it as it is?

-- Abhijit

>From 421267bba8394255feed8f9b9424d25d0be9f979 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <[email protected]>
Date: Mon, 11 May 2015 15:54:48 +0530
Subject: Make pgstattuple use heap_form_tuple instead of
 BuildTupleFromCStrings

---
 contrib/pgstattuple/pgstatindex.c | 43 ++++++++++++++++++-------------------
 contrib/pgstattuple/pgstattuple.c | 45 ++++++++++++++-------------------------
 2 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index a2ea5d7..608f729 100644
--- a/contrib/pgstattuple/pgstatindex.c
+++ b/contrib/pgstattuple/pgstatindex.c
@@ -257,7 +257,8 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
 	{
 		TupleDesc	tupleDesc;
 		int			j;
-		char	   *values[10];
+		Datum		values[10];
+		bool		nulls[10];
 		HeapTuple	tuple;
 
 		/* Build a tuple descriptor for our result type */
@@ -265,33 +266,31 @@ pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo)
 			elog(ERROR, "return type must be a row type");
 
 		j = 0;
-		values[j++] = psprintf("%d", indexStat.version);
-		values[j++] = psprintf("%d", indexStat.level);
-		values[j++] = psprintf(INT64_FORMAT,
-							   (indexStat.root_pages +
-								indexStat.leaf_pages +
-								indexStat.internal_pages +
-								indexStat.deleted_pages +
-								indexStat.empty_pages) * BLCKSZ);
-		values[j++] = psprintf("%u", indexStat.root_blkno);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.internal_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.leaf_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.empty_pages);
-		values[j++] = psprintf(INT64_FORMAT, indexStat.deleted_pages);
+		values[j++] = Int32GetDatum(indexStat.version);
+		values[j++] = Int32GetDatum(indexStat.level);
+		values[j++] = Int64GetDatum((indexStat.root_pages +
+									 indexStat.leaf_pages +
+									 indexStat.internal_pages +
+									 indexStat.deleted_pages +
+									 indexStat.empty_pages) * BLCKSZ);
+		values[j++] = Int32GetDatum(indexStat.root_blkno);
+		values[j++] = Int32GetDatum(indexStat.internal_pages);
+		values[j++] = Int32GetDatum(indexStat.leaf_pages);
+		values[j++] = Int32GetDatum(indexStat.empty_pages);
+		values[j++] = Int32GetDatum(indexStat.deleted_pages);
 		if (indexStat.max_avail > 0)
-			values[j++] = psprintf("%.2f",
-								   100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0);
+			values[j++] = Float8GetDatum(100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0);
 		else
-			values[j++] = pstrdup("NaN");
+			values[j++] = CStringGetDatum("NaN");
 		if (indexStat.leaf_pages > 0)
-			values[j++] = psprintf("%.2f",
-								   (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0);
+			values[j++] = Float8GetDatum((double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0);
 		else
-			values[j++] = pstrdup("NaN");
+			values[j++] = CStringGetDatum("NaN");
 
-		tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
-									   values);
+		for (j = 0; j < 10; j++)
+			nulls[j] = false;
 
+		tuple = heap_form_tuple(tupleDesc, values, nulls);
 		result = HeapTupleGetDatum(tuple);
 	}
 
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index c3a8b1d..552f188 100644
--- a/contrib/pgstattuple/pgstattuple.c
+++ b/contrib/pgstattuple/pgstattuple.c
@@ -85,28 +85,20 @@ static Datum
 build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo)
 {
 #define NCOLUMNS	9
-#define NCHARS		32
 
 	HeapTuple	tuple;
-	char	   *values[NCOLUMNS];
-	char		values_buf[NCOLUMNS][NCHARS];
+	Datum		values[NCOLUMNS];
+	bool		nulls[NCOLUMNS];
 	int			i;
 	double		tuple_percent;
 	double		dead_tuple_percent;
 	double		free_percent;	/* free/reusable space in % */
 	TupleDesc	tupdesc;
-	AttInMetadata *attinmeta;
 
 	/* Build a tuple descriptor for our result type */
 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
 
-	/*
-	 * Generate attribute metadata needed later to produce tuples from raw C
-	 * strings
-	 */
-	attinmeta = TupleDescGetAttInMetadata(tupdesc);
-
 	if (stat->table_len == 0)
 	{
 		tuple_percent = 0.0;
@@ -120,26 +112,21 @@ build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo)
 		free_percent = 100.0 * stat->free_space / stat->table_len;
 	}
 
-	/*
-	 * Prepare a values array for constructing the tuple. This should be an
-	 * array of C strings which will be processed later by the appropriate
-	 * "in" functions.
-	 */
-	for (i = 0; i < NCOLUMNS; i++)
-		values[i] = values_buf[i];
 	i = 0;
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len);
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count);
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len);
-	snprintf(values[i++], NCHARS, "%.2f", tuple_percent);
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count);
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len);
-	snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent);
-	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space);
-	snprintf(values[i++], NCHARS, "%.2f", free_percent);
-
-	/* build a tuple */
-	tuple = BuildTupleFromCStrings(attinmeta, values);
+	values[i++] = Int64GetDatum(stat->table_len);
+	values[i++] = Int64GetDatum(stat->tuple_count);
+	values[i++] = Int64GetDatum(stat->tuple_len);
+	values[i++] = Float8GetDatum(tuple_percent);
+	values[i++] = Int64GetDatum(stat->dead_tuple_count);
+	values[i++] = Int64GetDatum(stat->dead_tuple_len);
+	values[i++] = Float8GetDatum(dead_tuple_percent);
+	values[i++] = Int64GetDatum(stat->free_space);
+	values[i++] = Float8GetDatum(free_percent);
+
+	for (i = 0; i < NCOLUMNS; i++)
+		nulls[i] = false;
+
+	tuple = heap_form_tuple(tupdesc, values, nulls);
 
 	/* make the tuple into a datum */
 	return HeapTupleGetDatum(tuple);
-- 
1.9.1

>From 0be1d1d49703ff9704255495cedb91c097fbeaa0 Mon Sep 17 00:00:00 2001
From: Abhijit Menon-Sen <[email protected]>
Date: Fri, 26 Dec 2014 12:37:13 +0530
Subject: Add pgstatbloat to pgstattuple

---
 contrib/pgstattuple/Makefile                       |   4 +-
 contrib/pgstattuple/pgstatbloat.c                  | 282 +++++++++++++++++++++
 contrib/pgstattuple/pgstattuple--1.2--1.3.sql      |  18 ++
 .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} |  18 +-
 contrib/pgstattuple/pgstattuple.control            |   2 +-
 doc/src/sgml/pgstattuple.sgml                      | 135 ++++++++++
 6 files changed, 455 insertions(+), 4 deletions(-)
 create mode 100644 contrib/pgstattuple/pgstatbloat.c
 create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql
 rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (72%)

diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..d7d27a5 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c
new file mode 100644
index 0000000..1c1f388
--- /dev/null
+++ b/contrib/pgstattuple/pgstatbloat.c
@@ -0,0 +1,282 @@
+/*
+ * contrib/pgstattuple/pgstatbloat.c
+ * Fast bloat estimation functions
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstatbloat);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here. total_pages and misc_count are not returned.
+ */
+typedef struct pgstatbloat_output_type
+{
+	uint64		table_len;
+	uint64		total_pages;
+	uint64		scanned_pages;
+	uint64		tuple_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		misc_count;
+	uint64		free_space;
+} pgstatbloat_output_type;
+
+#define NUM_PGSTATBLOAT_OUTPUT_COLUMNS 10
+
+/*
+ * This function takes an already open relation and scans its pages,
+ * skipping those that have the corresponding visibility map bit set.
+ * For pages we skip, we find the free space from the free space map
+ * and approximate tuple_len on that basis. For the others, we count
+ * the exact number of dead tuples etc.
+ *
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
+ * we do not try to avoid skipping single pages.
+ */
+
+static void
+pgstatbloat_heap(Relation rel, pgstatbloat_output_type *stat)
+{
+	BlockNumber scanned,
+				nblocks,
+				blkno;
+	Buffer		vmbuffer = InvalidBuffer;
+	BufferAccessStrategy bstrategy;
+	TransactionId OldestXmin;
+
+	OldestXmin = GetOldestXmin(rel, true);
+	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+	scanned = 0;
+	nblocks = RelationGetNumberOfBlocks(rel);
+
+	for (blkno = 0; blkno < nblocks; blkno++)
+	{
+		Buffer		buf;
+		Page		page;
+		OffsetNumber offnum,
+					maxoff;
+		Size		freespace;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the page has only visible tuples, then we can find out the
+		 * free space from the FSM and move on.
+		 */
+
+		if (visibilitymap_test(rel, blkno, &vmbuffer))
+		{
+			freespace = GetRecordedFreeSpace(rel, blkno);
+			stat->tuple_len += BLCKSZ - freespace;
+			stat->free_space += freespace;
+			continue;
+		}
+
+		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+								 RBM_NORMAL, bstrategy);
+
+		LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+		page = BufferGetPage(buf);
+
+		if (!PageIsNew(page))
+			stat->free_space += PageGetHeapFreeSpace(page);
+
+		if (PageIsNew(page) || PageIsEmpty(page))
+		{
+			UnlockReleaseBuffer(buf);
+			continue;
+		}
+
+		scanned++;
+
+		/*
+		 * Look at each tuple on the page and decide whether it's live
+		 * or dead, then count it and its size. Unlike lazy_scan_heap,
+		 * we can afford to ignore problems and special cases.
+		 */
+
+		maxoff = PageGetMaxOffsetNumber(page);
+
+		for (offnum = FirstOffsetNumber;
+			 offnum <= maxoff;
+			 offnum = OffsetNumberNext(offnum))
+		{
+			ItemId		itemid;
+			HeapTupleData tuple;
+
+			itemid = PageGetItemId(page, offnum);
+
+			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
+				ItemIdIsDead(itemid))
+			{
+				continue;
+			}
+
+			Assert(ItemIdIsNormal(itemid));
+
+			ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+			tuple.t_len = ItemIdGetLength(itemid);
+			tuple.t_tableOid = RelationGetRelid(rel);
+
+			/*
+			 * We count live and dead tuples, but we also need to add up
+			 * others in order to feed vac_estimate_reltuples.
+			 */
+
+			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
+			{
+				case HEAPTUPLE_RECENTLY_DEAD:
+					stat->misc_count++;
+					/* Fall through */
+				case HEAPTUPLE_DEAD:
+					stat->dead_tuple_len += tuple.t_len;
+					stat->dead_tuple_count++;
+					break;
+				case HEAPTUPLE_LIVE:
+					stat->tuple_len += tuple.t_len;
+					stat->tuple_count++;
+					break;
+				case HEAPTUPLE_INSERT_IN_PROGRESS:
+				case HEAPTUPLE_DELETE_IN_PROGRESS:
+					stat->misc_count++;
+					break;
+				default:
+					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+					break;
+			}
+		}
+
+		UnlockReleaseBuffer(buf);
+	}
+
+	stat->table_len = (uint64) nblocks * BLCKSZ;
+	stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
+											   stat->tuple_count+stat->misc_count);
+	stat->total_pages = nblocks;
+	stat->scanned_pages = scanned;
+
+	if (BufferIsValid(vmbuffer))
+	{
+		ReleaseBuffer(vmbuffer);
+		vmbuffer = InvalidBuffer;
+	}
+
+	relation_close(rel, AccessShareLock);
+}
+
+/*
+ * build and return a pgstatbloat_output_type tuple
+ */
+static HeapTuple
+build_tuple(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+	TupleDesc	tupdesc;
+	bool		nulls[NUM_PGSTATBLOAT_OUTPUT_COLUMNS];
+	Datum		values[NUM_PGSTATBLOAT_OUTPUT_COLUMNS];
+	double		scanned_percent = 0;
+	double		tuple_percent = 0;
+	double		dead_tuple_percent = 0;
+	double		free_percent = 0;
+	int			i;
+
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	/*
+	 * Calculate percentages.
+	 */
+
+	if (stat->total_pages != 0)
+		scanned_percent = 100 * stat->scanned_pages / stat->total_pages;
+
+	if (stat->table_len != 0)
+	{
+		tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
+		dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
+		free_percent = 100.0 * stat->free_space / stat->table_len;
+	}
+
+	i = 0;
+	values[i++] = Int64GetDatum(stat->table_len);
+	values[i++] = Float8GetDatum(scanned_percent);
+	values[i++] = Int64GetDatum(stat->tuple_count);
+	values[i++] = Int64GetDatum(stat->tuple_len);
+	values[i++] = Float8GetDatum(tuple_percent);
+	values[i++] = Int64GetDatum(stat->dead_tuple_count);
+	values[i++] = Int64GetDatum(stat->dead_tuple_len);
+	values[i++] = Float8GetDatum(dead_tuple_percent);
+	values[i++] = Int64GetDatum(stat->free_space);
+	values[i++] = Float8GetDatum(free_percent);
+
+	for (i = 0; i < NUM_PGSTATBLOAT_OUTPUT_COLUMNS; i++)
+		nulls[i] = false;
+
+	return heap_form_tuple(tupdesc, values, nulls);
+}
+
+/*
+ * Returns estimated live/dead tuple statistics for the given relid.
+ */
+
+Datum
+pgstatbloat(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	Relation	rel;
+	pgstatbloat_output_type stat = {0};
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use pgstatbloat functions"))));
+
+	rel = relation_open(relid, AccessShareLock);
+
+	/*
+	 * Reject attempts to read non-local temporary relations; we would be
+	 * likely to get wrong data since we have no visibility into the owning
+	 * session's local buffers.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot access temporary tables of other sessions")));
+
+	/*
+	 * We support only ordinary relations and materialised views,
+	 * because we depend on the visibility map and free space map
+	 * for our estimates about unscanned pages.
+	 */
+	if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+		  rel->rd_rel->relkind == RELKIND_MATVIEW))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("\"%s\" is not a table or materialized view",
+						RelationGetRelationName(rel))));
+
+	pgstatbloat_heap(rel, &stat);
+	return HeapTupleGetDatum(build_tuple(&stat, fcinfo));
+}
diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
new file mode 100644
index 0000000..e4762d6
--- /dev/null
+++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
@@ -0,0 +1,18 @@
+/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION pgstatbloat(IN reloid regclass,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT approx_free_space BIGINT,       -- estimated free space in bytes
+    OUT approx_free_percent FLOAT8)     -- free space in % (based on estimate)
+AS 'MODULE_PATHNAME', 'pgstatbloat'
+LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql
similarity index 72%
rename from contrib/pgstattuple/pgstattuple--1.2.sql
rename to contrib/pgstattuple/pgstattuple--1.3.sql
index e5fa2f5..f0e2a7d 100644
--- a/contrib/pgstattuple/pgstattuple--1.2.sql
+++ b/contrib/pgstattuple/pgstattuple--1.3.sql
@@ -1,4 +1,4 @@
-/* contrib/pgstattuple/pgstattuple--1.2.sql */
+/* contrib/pgstattuple/pgstattuple--1.3.sql */
 
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit
@@ -77,3 +77,19 @@ CREATE FUNCTION pg_relpages(IN relname regclass)
 RETURNS BIGINT
 AS 'MODULE_PATHNAME', 'pg_relpagesbyid'
 LANGUAGE C STRICT;
+
+/* New stuff in 1.3 begins here */
+
+CREATE FUNCTION pgstatbloat(IN reloid regclass,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT approx_free_space BIGINT,       -- estimated free space in bytes
+    OUT approx_free_percent FLOAT8)     -- free space in % (based on estimate)
+AS 'MODULE_PATHNAME', 'pgstatbloat'
+LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index a7cf47f..c03b180 100644
--- a/contrib/pgstattuple/pgstattuple.control
+++ b/contrib/pgstattuple/pgstattuple.control
@@ -1,5 +1,5 @@
 # pgstattuple extension
 comment = 'show tuple-level statistics'
-default_version = '1.2'
+default_version = '1.3'
 module_pathname = '$libdir/pgstattuple'
 relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 9cabd71..958c6f7 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -358,6 +358,140 @@ pending_tuples | 0
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term>
+     <indexterm>
+      <primary>pgstatbloat</primary>
+     </indexterm>
+     <function>pgstatbloat(regclass) returns record</>
+    </term>
+
+    <listitem>
+     <para>
+      <function>pgstatbloat</function> is a faster alternative to
+      <function>pgstattuple</function> that returns approximate results.
+      The argument is the target relation's OID.
+      For example:
+<programlisting>
+test=> SELECT * FROM pgstatbloat('pg_catalog.pg_proc'::regclass);
+-[ RECORD 1 ]--------+-------
+table_len            | 573440
+scanned_percent      | 2
+approx_tuple_count   | 2740
+approx_tuple_len     | 561210
+approx_tuple_percent | 97.87
+dead_tuple_count     | 0
+dead_tuple_len       | 0
+dead_tuple_percent   | 0
+approx_free_space    | 11996
+approx_free_percent  | 2.09
+</programlisting>
+      The output columns are described in <xref linkend="pgstatbloat-columns">.
+     </para>
+
+     <para>
+     Whereas <function>pgstattuple</function> always performs a
+     full-table scan and returns an exact count of live and dead tuples
+     (and their sizes) and free space, <function>pgstatbloat</function>
+     tries to avoid the full-table scan and returns exact dead tuple
+     statistics along with an approximation of the number and
+     size of live tuples and free space.
+     </para>
+
+     <para>
+     It does this by skipping pages that have only visible tuples
+     according to the visibility map (if a page has the corresponding VM
+     bit set, then it is assumed to contain no dead tuples). For such
+     pages, it derives the free space value from the free space map, and
+     assumes that the rest of the space on the page is taken up by live
+     tuples.
+     </para>
+ 
+     <para>
+     For pages that cannot be skipped, it scans each tuple, recording its
+     presence and size in the appropriate counters, and adding up the
+     free space on the page. At the end, it estimates the total number of
+     live tuples based on the number of pages and tuples scanned (in the
+     same way that VACUUM estimates pg_class.reltuples).
+     </para>
+
+     <table id="pgstatbloat-columns">
+      <title><function>pgstatbloat</function> Output Columns</title>
+      <tgroup cols="3">
+       <thead>
+        <row>
+         <entry>Column</entry>
+         <entry>Type</entry>
+         <entry>Description</entry>
+        </row>
+       </thead>
+ 
+       <tbody>
+        <row>
+         <entry><structfield>table_len</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Physical relation length in bytes (exact)</entry>
+        </row>
+        <row>
+         <entry><structfield>scanned_percent</structfield></entry>
+         <entry><type>float8</type></entry>
+         <entry>Percentage of table scanned</entry>
+        </row>
+        <row>
+         <entry><structfield>tuple_count</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Number of live tuples (estimated)</entry>
+        </row>
+        <row>
+         <entry><structfield>tuple_len</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Total length of live tuples in bytes (estimated)</entry>
+        </row>
+        <row>
+         <entry><structfield>tuple_percent</structfield></entry>
+         <entry><type>float8</type></entry>
+         <entry>Percentage of live tuples</entry>
+        </row>
+        <row>
+         <entry><structfield>dead_tuple_count</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Number of dead tuples (exact)</entry>
+        </row>
+        <row>
+         <entry><structfield>dead_tuple_len</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Total length of dead tuples in bytes (exact)</entry>
+        </row>
+        <row>
+         <entry><structfield>dead_tuple_percent</structfield></entry>
+         <entry><type>float8</type></entry>
+         <entry>Percentage of dead tuples</entry>
+        </row>
+        <row>
+         <entry><structfield>approx_free_space</structfield></entry>
+         <entry><type>bigint</type></entry>
+         <entry>Total free space in bytes (estimated)</entry>
+        </row>
+        <row>
+         <entry><structfield>approx_free_percent</structfield></entry>
+         <entry><type>float8</type></entry>
+         <entry>Percentage of free space</entry>
+        </row>
+ 
+       </tbody>
+      </tgroup>
+     </table>
+ 
+     <para>
+     In the above output, the free space figures may not match the
+     <function>pgstattuple</function> output exactly, because the free
+     space map gives us an exact figure, but is not guaranteed to be
+     accurate to the byte.
+     </para>
+
+    </listitem>
+   </varlistentry>
+
   </variablelist>
  </sect2>
 
@@ -366,6 +500,7 @@ pending_tuples | 0
 
   <para>
    Tatsuo Ishii and Satoshi Nagayasu
+   Abhijit Menon-Sen
   </para>
  </sect2>
 
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] a fast bloat measurement tool (was Re: Measuring relation free space)

Reply via email to