At 2014-09-25 15:40:11 +0530, a...@2ndquadrant.com wrote:
>
> All right, then I'll post a version that addresses Amit's other
> points, adds a new file/function to pgstattuple, acquires content
> locks, and uses HeapTupleSatisfiesVacuum, hint-bit setting and all.

Sorry, I forgot to post this patch. It does what I listed above, and
seems to work fine (it's still quite a lot faster than pgstattuple
in many cases).

A couple of remaining questions:

1. I didn't change the handling of LP_DEAD items, because the way it is
   now matches what pgstattuple counts. I'm open to changing it, though.
   Maybe it makes sense to count LP_DEAD items iff lp_len != 0? Or just
   leave it as it is? I think it doesn't matter much.

2. I changed the code to acquire the content locks on the buffer, as
   discussed, but it still uses HeapTupleSatisfiesVacuum. Amit suggested
   using HeapTupleSatisfiesVisibility, but it's not clear to me why that
   would be better. I welcome advice in this matter.

   (If anything, I should use HeapTupleIsSurelyDead, which doesn't set
   any hint bits, but which I earlier rejected as too conservative in
   its dead/not-dead decisions for this purpose.)

(I've actually patched the pgstattuple.sgml docs as well, but I want to
re-read that to make sure it's up to date, and didn't want to wait to
post the code changes.)

I also didn't change the name. I figure it's easy enough to change it
everywhere once all the remaining pieces are in place.

Comments welcome.

-- Abhijit
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585c..bae80c9 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
 # contrib/pgstattuple/Makefile
 
 MODULE_big	= pgstattuple
-OBJS		= pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS		= pgstattuple.o pgstatindex.o fastbloat.o $(WIN32RES)
 
 EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
 PGFILEDESC = "pgstattuple - tuple-level statistics"
 
 REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/fastbloat.c b/contrib/pgstattuple/fastbloat.c
new file mode 100644
index 0000000..b33db14
--- /dev/null
+++ b/contrib/pgstattuple/fastbloat.c
@@ -0,0 +1,367 @@
+/*
+ * contrib/pgstattuple/fastbloat.c
+ *
+ * Abhijit Menon-Sen <a...@2ndquadrant.com>
+ * Portions Copyright (c) 2001,2002	Tatsuo Ishii (from pg_stattuple)
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(fastbloat);
+PG_FUNCTION_INFO_V1(fastbloatbyid);
+
+/*
+ * tuple_percent, dead_tuple_percent and free_percent are computable,
+ * so not defined here.
+ */
+typedef struct fastbloat_output_type
+{
+	uint64		table_len;
+	uint64		tuple_count;
+	uint64		tuple_len;
+	uint64		dead_tuple_count;
+	uint64		dead_tuple_len;
+	uint64		free_space;
+	uint64		total_pages;
+	uint64		scanned_pages;
+} fastbloat_output_type;
+
+static Datum build_output_type(fastbloat_output_type *stat,
+							   FunctionCallInfo fcinfo);
+static Datum fbstat_relation(Relation rel, FunctionCallInfo fcinfo);
+static Datum fbstat_heap(Relation rel, FunctionCallInfo fcinfo);
+
+/*
+ * build a fastbloat_output_type tuple
+ */
+static Datum
+build_output_type(fastbloat_output_type *stat, FunctionCallInfo fcinfo)
+{
+#define NCOLUMNS	10
+#define NCHARS		32
+
+	HeapTuple	tuple;
+	char	   *values[NCOLUMNS];
+	char		values_buf[NCOLUMNS][NCHARS];
+	int			i;
+	double		tuple_percent;
+	double		dead_tuple_percent;
+	double		free_percent;	/* free/reusable space in % */
+	double		scanned_percent;
+	TupleDesc	tupdesc;
+	AttInMetadata *attinmeta;
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	/*
+	 * Generate attribute metadata needed later to produce tuples from raw C
+	 * strings
+	 */
+	attinmeta = TupleDescGetAttInMetadata(tupdesc);
+
+	if (stat->table_len == 0)
+	{
+		tuple_percent = 0.0;
+		dead_tuple_percent = 0.0;
+		free_percent = 0.0;
+	}
+	else
+	{
+		tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
+		dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
+		free_percent = 100.0 * stat->free_space / stat->table_len;
+	}
+
+	scanned_percent = 0.0;
+	if (stat->total_pages != 0)
+	{
+		scanned_percent = 100 * stat->scanned_pages / stat->total_pages;
+	}
+
+	for (i = 0; i < NCOLUMNS; i++)
+		values[i] = values_buf[i];
+	i = 0;
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len);
+	snprintf(values[i++], NCHARS, "%.2f", scanned_percent);
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count);
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len);
+	snprintf(values[i++], NCHARS, "%.2f", tuple_percent);
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count);
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len);
+	snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent);
+	snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space);
+	snprintf(values[i++], NCHARS, "%.2f", free_percent);
+
+	tuple = BuildTupleFromCStrings(attinmeta, values);
+
+	return HeapTupleGetDatum(tuple);
+}
+
+/* Returns live/dead tuple statistics for the named table. */
+
+Datum
+fastbloat(PG_FUNCTION_ARGS)
+{
+	text	   *relname = PG_GETARG_TEXT_P(0);
+	RangeVar   *relrv;
+	Relation	rel;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use fastbloat functions"))));
+
+	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+	rel = relation_openrv(relrv, AccessShareLock);
+
+	PG_RETURN_DATUM(fbstat_relation(rel, fcinfo));
+}
+
+/* As above, but takes a reloid instead of a relation name. */
+
+Datum
+fastbloatbyid(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	Relation	rel;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use fastbloat functions"))));
+
+	rel = relation_open(relid, AccessShareLock);
+
+	PG_RETURN_DATUM(fbstat_relation(rel, fcinfo));
+}
+
+/*
+ * A helper function to reject unsupported relation types. We depend on
+ * the visibility map to decide which pages we can skip, so we can't
+ * support indexes, for example, which don't have a VM.
+ */
+
+static Datum
+fbstat_relation(Relation rel, FunctionCallInfo fcinfo)
+{
+	const char *err;
+
+	/*
+	 * Reject attempts to read non-local temporary relations; we would be
+	 * likely to get wrong data since we have no visibility into the owning
+	 * session's local buffers.
+	 */
+	if (RELATION_IS_OTHER_TEMP(rel))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot access temporary tables of other sessions")));
+
+	switch (rel->rd_rel->relkind)
+	{
+		case RELKIND_RELATION:
+		case RELKIND_MATVIEW:
+			return fbstat_heap(rel, fcinfo);
+		case RELKIND_TOASTVALUE:
+			err = "toast value";
+			break;
+		case RELKIND_SEQUENCE:
+			err = "sequence";
+			break;
+		case RELKIND_INDEX:
+			err = "index";
+			break;
+		case RELKIND_VIEW:
+			err = "view";
+			break;
+		case RELKIND_COMPOSITE_TYPE:
+			err = "composite type";
+			break;
+		case RELKIND_FOREIGN_TABLE:
+			err = "foreign table";
+			break;
+		default:
+			err = "unknown";
+			break;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("\"%s\" (%s) is not supported",
+					RelationGetRelationName(rel), err)));
+	return 0;
+}
+
+/*
+ * This function takes an already open relation and scans its pages,
+ * skipping those that have the corresponding visibility map bit set.
+ * For pages we skip, we find the free space from the free space map
+ * and approximate tuple_len on that basis. For the others, we count
+ * the exact number of dead tuples etc.
+ *
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
+ * we do not try to avoid skipping single pages.
+ */
+
+static Datum
+fbstat_heap(Relation rel, FunctionCallInfo fcinfo)
+{
+	BlockNumber scanned,
+				nblocks,
+				blkno;
+	Buffer		vmbuffer = InvalidBuffer;
+	fastbloat_output_type stat = {0};
+	BufferAccessStrategy bstrategy;
+	TransactionId OldestXmin;
+
+	OldestXmin = GetOldestXmin(rel, true);
+	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+	scanned = 0;
+	nblocks = RelationGetNumberOfBlocks(rel);
+
+	for (blkno = 0; blkno < nblocks; blkno++)
+	{
+		Buffer		buf;
+		Page		page;
+		OffsetNumber offnum,
+					maxoff;
+		Size		freespace;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the page has only visible tuples, then we can find out the
+		 * free space from the FSM and move on.
+		 */
+
+		if (visibilitymap_test(rel, blkno, &vmbuffer))
+		{
+			freespace = GetRecordedFreeSpace(rel, blkno);
+			stat.tuple_len += BLCKSZ - freespace;
+			stat.free_space += freespace;
+			continue;
+		}
+
+		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+								 RBM_NORMAL, bstrategy);
+
+		LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+		page = BufferGetPage(buf);
+
+		stat.free_space += PageGetHeapFreeSpace(page);
+
+		if (PageIsNew(page))
+		{
+			UnlockReleaseBuffer(buf);
+			continue;
+		}
+
+		scanned++;
+
+		/*
+		 * Look at each tuple on the page and decide whether it's live
+		 * or dead, then count it and its size. Unlike lazy_scan_heap,
+		 * we can afford to ignore problems and special cases.
+		 */
+
+		maxoff = PageGetMaxOffsetNumber(page);
+
+		for (offnum = FirstOffsetNumber;
+			 offnum <= maxoff;
+			 offnum = OffsetNumberNext(offnum))
+		{
+			ItemId		itemid;
+			HeapTupleData tuple;
+
+			itemid = PageGetItemId(page, offnum);
+
+			if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
+				ItemIdIsDead(itemid))
+			{
+				continue;
+			}
+
+			Assert(ItemIdIsNormal(itemid));
+
+			ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+			tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+			tuple.t_len = ItemIdGetLength(itemid);
+			tuple.t_tableOid = RelationGetRelid(rel);
+
+			switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
+			{
+				case HEAPTUPLE_DEAD:
+				case HEAPTUPLE_RECENTLY_DEAD:
+					stat.dead_tuple_len += tuple.t_len;
+					stat.dead_tuple_count++;
+					break;
+				case HEAPTUPLE_LIVE:
+					stat.tuple_len += tuple.t_len;
+					stat.tuple_count++;
+					break;
+				case HEAPTUPLE_INSERT_IN_PROGRESS:
+				case HEAPTUPLE_DELETE_IN_PROGRESS:
+					break;
+				default:
+					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+					break;
+			}
+		}
+
+		UnlockReleaseBuffer(buf);
+	}
+
+	stat.table_len = (uint64) nblocks * BLCKSZ;
+	stat.tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
+											  stat.tuple_count);
+	stat.total_pages = nblocks;
+	stat.scanned_pages = scanned;
+
+	if (BufferIsValid(vmbuffer))
+	{
+		ReleaseBuffer(vmbuffer);
+		vmbuffer = InvalidBuffer;
+	}
+
+	relation_close(rel, AccessShareLock);
+
+	return build_output_type(&stat, fcinfo);
+}
diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
new file mode 100644
index 0000000..8a4064e
--- /dev/null
+++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
@@ -0,0 +1,32 @@
+/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION fastbloat(IN relname text,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT free_space BIGINT,              -- exact free space in bytes
+    OUT free_percent FLOAT8)            -- free space in %
+AS 'MODULE_PATHNAME', 'fastbloat'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION fastbloat(IN reloid regclass,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT free_space BIGINT,              -- exact free space in bytes
+    OUT free_percent FLOAT8)            -- free space in %
+AS 'MODULE_PATHNAME', 'fastbloatbyid'
+LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql
similarity index 59%
rename from contrib/pgstattuple/pgstattuple--1.2.sql
rename to contrib/pgstattuple/pgstattuple--1.3.sql
index e5fa2f5..0a111e6 100644
--- a/contrib/pgstattuple/pgstattuple--1.2.sql
+++ b/contrib/pgstattuple/pgstattuple--1.3.sql
@@ -1,4 +1,4 @@
-/* contrib/pgstattuple/pgstattuple--1.2.sql */
+/* contrib/pgstattuple/pgstattuple--1.3.sql */
 
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit
@@ -77,3 +77,33 @@ CREATE FUNCTION pg_relpages(IN relname regclass)
 RETURNS BIGINT
 AS 'MODULE_PATHNAME', 'pg_relpagesbyid'
 LANGUAGE C STRICT;
+
+/* New stuff in 1.3 begins here */
+
+CREATE FUNCTION fastbloat(IN relname text,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT free_space BIGINT,              -- exact free space in bytes
+    OUT free_percent FLOAT8)            -- free space in %
+AS 'MODULE_PATHNAME', 'fastbloat'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION fastbloat(IN reloid regclass,
+    OUT table_len BIGINT,               -- physical table length in bytes
+    OUT scanned_percent FLOAT8,         -- what percentage of the table's pages was scanned
+    OUT approx_tuple_count BIGINT,      -- estimated number of live tuples
+    OUT approx_tuple_len BIGINT,        -- estimated total length in bytes of live tuples
+    OUT approx_tuple_percent FLOAT8,    -- live tuples in % (based on estimate)
+    OUT dead_tuple_count BIGINT,        -- exact number of dead tuples
+    OUT dead_tuple_len BIGINT,          -- exact total length in bytes of dead tuples
+    OUT dead_tuple_percent FLOAT8,      -- dead tuples in % (based on estimate)
+    OUT free_space BIGINT,              -- exact free space in bytes
+    OUT free_percent FLOAT8)            -- free space in %
+AS 'MODULE_PATHNAME', 'fastbloatbyid'
+LANGUAGE C STRICT;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to