At 2015-03-31 22:43:49 +0530, a...@2ndquadrant.com wrote: > > I'm just posting this WIP patch where I've renamed fastbloat to > pgstatbloat as suggested by Tomas, and added in the documentation, and > so on.
Here's the revised version that also adds the count of RECENTLY_DEAD and INSERT/DELETE_IN_PROGRESS tuples to the call to vac_estimate_reltuples. Tomas, I agree that the build_output_tuple function could use cleaning up, but it's the same as the corresponding code in pgstattuple, and it seems to me reasonable to clean both up together in a separate patch. -- Abhijit
>From d11b84627438fca12955dfad77042be0a27c9acc Mon Sep 17 00:00:00 2001 From: Abhijit Menon-Sen <a...@2ndquadrant.com> Date: Fri, 26 Dec 2014 12:37:13 +0530 Subject: Add pgstatbloat to pgstattuple --- contrib/pgstattuple/Makefile | 4 +- contrib/pgstattuple/pgstatbloat.c | 340 +++++++++++++++++++++ contrib/pgstattuple/pgstattuple--1.2--1.3.sql | 18 ++ .../{pgstattuple--1.2.sql => pgstattuple--1.3.sql} | 18 +- contrib/pgstattuple/pgstattuple.control | 2 +- doc/src/sgml/pgstattuple.sgml | 135 ++++++++ 6 files changed, 513 insertions(+), 4 deletions(-) create mode 100644 contrib/pgstattuple/pgstatbloat.c create mode 100644 contrib/pgstattuple/pgstattuple--1.2--1.3.sql rename contrib/pgstattuple/{pgstattuple--1.2.sql => pgstattuple--1.3.sql} (73%) diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index 862585c..d7d27a5 100644 --- a/contrib/pgstattuple/Makefile +++ b/contrib/pgstattuple/Makefile @@ -1,10 +1,10 @@ # contrib/pgstattuple/Makefile MODULE_big = pgstattuple -OBJS = pgstattuple.o pgstatindex.o $(WIN32RES) +OBJS = pgstattuple.o pgstatindex.o pgstatbloat.o $(WIN32RES) EXTENSION = pgstattuple -DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql +DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql PGFILEDESC = "pgstattuple - tuple-level statistics" REGRESS = pgstattuple diff --git a/contrib/pgstattuple/pgstatbloat.c b/contrib/pgstattuple/pgstatbloat.c new file mode 100644 index 0000000..2dd1900 --- /dev/null +++ b/contrib/pgstattuple/pgstatbloat.c @@ -0,0 +1,340 @@ +/* + * contrib/pgstattuple/pgstatbloat.c + * + * Abhijit Menon-Sen <a...@2ndquadrant.com> + * Portions Copyright (c) 2001,2002 Tatsuo Ishii (from pgstattuple) + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "access/visibilitymap.h" +#include "access/transam.h" +#include "access/xact.h" +#include "access/multixact.h" +#include "access/htup_details.h" +#include "catalog/namespace.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/freespace.h" +#include "storage/procarray.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/tqual.h" +#include "commands/vacuum.h" + +PG_FUNCTION_INFO_V1(pgstatbloat); + +/* + * tuple_percent, dead_tuple_percent and free_percent are computable, + * so not defined here. + */ +typedef struct pgstatbloat_output_type +{ + uint64 table_len; + uint64 tuple_count; + uint64 misc_count; + uint64 tuple_len; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + uint64 free_space; + uint64 total_pages; + uint64 scanned_pages; +} pgstatbloat_output_type; + +static Datum build_output_type(pgstatbloat_output_type *stat, + FunctionCallInfo fcinfo); +static Datum pgstatbloat_heap(Relation rel, FunctionCallInfo fcinfo); + +/* + * build a pgstatbloat_output_type tuple + */ +static Datum +build_output_type(pgstatbloat_output_type *stat, FunctionCallInfo fcinfo) +{ +#define NCOLUMNS 10 +#define NCHARS 32 + + HeapTuple tuple; + char *values[NCOLUMNS]; + char values_buf[NCOLUMNS][NCHARS]; + int i; + double tuple_percent; + double dead_tuple_percent; + double free_percent; /* free/reusable space in % */ + double scanned_percent; + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* + * Generate attribute metadata needed later to produce tuples from raw C + * strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + if (stat->table_len == 0) + { + tuple_percent = 0.0; + dead_tuple_percent = 0.0; + free_percent = 0.0; + } + else + { + tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + free_percent = 100.0 * stat->free_space / stat->table_len; + } + + scanned_percent = 0.0; + if (stat->total_pages != 0) + { + scanned_percent = 100 * stat->scanned_pages / stat->total_pages; + } + + for (i = 0; i < NCOLUMNS; i++) + values[i] = values_buf[i]; + i = 0; + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len); + snprintf(values[i++], NCHARS, "%.2f", scanned_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len); + snprintf(values[i++], NCHARS, "%.2f", tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len); + snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space); + snprintf(values[i++], NCHARS, "%.2f", free_percent); + + tuple = BuildTupleFromCStrings(attinmeta, values); + + return HeapTupleGetDatum(tuple); +} + +/* Returns live/dead tuple statistics for the given relid. */ + +Datum +pgstatbloat(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + const char *err; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use pgstatbloat functions")))); + + rel = relation_open(relid, AccessShareLock); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + PG_RETURN_DATUM(pgstatbloat_heap(rel, fcinfo)); + case RELKIND_TOASTVALUE: + err = "toast value"; + break; + case RELKIND_SEQUENCE: + err = "sequence"; + break; + case RELKIND_INDEX: + err = "index"; + break; + case RELKIND_VIEW: + err = "view"; + break; + case RELKIND_COMPOSITE_TYPE: + err = "composite type"; + break; + case RELKIND_FOREIGN_TABLE: + err = "foreign table"; + break; + default: + err = "unknown"; + break; + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" (%s) is not supported", + RelationGetRelationName(rel), err))); +} + +/* + * This function takes an already open relation and scans its pages, + * skipping those that have the corresponding visibility map bit set. + * For pages we skip, we find the free space from the free space map + * and approximate tuple_len on that basis. For the others, we count + * the exact number of dead tuples etc. + * + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but + * we do not try to avoid skipping single pages. + */ + +static Datum +pgstatbloat_heap(Relation rel, FunctionCallInfo fcinfo) +{ + BlockNumber scanned, + nblocks, + blkno; + Buffer vmbuffer = InvalidBuffer; + pgstatbloat_output_type stat = {0}; + BufferAccessStrategy bstrategy; + TransactionId OldestXmin; + + OldestXmin = GetOldestXmin(rel, true); + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + scanned = 0; + nblocks = RelationGetNumberOfBlocks(rel); + + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + Size freespace; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the page has only visible tuples, then we can find out the + * free space from the FSM and move on. + */ + + if (visibilitymap_test(rel, blkno, &vmbuffer)) + { + freespace = GetRecordedFreeSpace(rel, blkno); + stat.tuple_len += BLCKSZ - freespace; + stat.free_space += freespace; + continue; + } + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buf); + + stat.free_space += PageGetHeapFreeSpace(page); + + if (PageIsNew(page) || PageIsEmpty(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + scanned++; + + /* + * Look at each tuple on the page and decide whether it's live + * or dead, then count it and its size. Unlike lazy_scan_heap, + * we can afford to ignore problems and special cases. + */ + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || + ItemIdIsDead(itemid)) + { + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + /* + * We count live and dead tuples, but we also need to add up + * others in order to feed vac_estimate_reltuples. + */ + + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_RECENTLY_DEAD: + stat.misc_count++; + /* Fall through */ + case HEAPTUPLE_DEAD: + stat.dead_tuple_len += tuple.t_len; + stat.dead_tuple_count++; + break; + case HEAPTUPLE_LIVE: + stat.tuple_len += tuple.t_len; + stat.tuple_count++; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + case HEAPTUPLE_DELETE_IN_PROGRESS: + stat.misc_count++; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + } + + UnlockReleaseBuffer(buf); + } + + stat.table_len = (uint64) nblocks * BLCKSZ; + stat.tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned, + stat.tuple_count+stat.misc_count); + stat.total_pages = nblocks; + stat.scanned_pages = scanned; + + if (BufferIsValid(vmbuffer)) + { + ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; + } + + relation_close(rel, AccessShareLock); + + return build_output_type(&stat, fcinfo); +} diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql new file mode 100644 index 0000000..dba7993 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql @@ -0,0 +1,18 @@ +/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION pgstatbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstatbloat' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql similarity index 73% rename from contrib/pgstattuple/pgstattuple--1.2.sql rename to contrib/pgstattuple/pgstattuple--1.3.sql index e5fa2f5..93593ee 100644 --- a/contrib/pgstattuple/pgstattuple--1.2.sql +++ b/contrib/pgstattuple/pgstattuple--1.3.sql @@ -1,4 +1,4 @@ -/* contrib/pgstattuple/pgstattuple--1.2.sql */ +/* contrib/pgstattuple/pgstattuple--1.3.sql */ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit @@ -77,3 +77,19 @@ CREATE FUNCTION pg_relpages(IN relname regclass) RETURNS BIGINT AS 'MODULE_PATHNAME', 'pg_relpagesbyid' LANGUAGE C STRICT; + +/* New stuff in 1.3 begins here */ + +CREATE FUNCTION pgstatbloat(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT free_space BIGINT, -- exact free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstatbloat' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control index a7cf47f..c03b180 100644 --- a/contrib/pgstattuple/pgstattuple.control +++ b/contrib/pgstattuple/pgstattuple.control @@ -1,5 +1,5 @@ # pgstattuple extension comment = 'show tuple-level statistics' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/pgstattuple' relocatable = true diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml index 9cabd71..2cd6cdc 100644 --- a/doc/src/sgml/pgstattuple.sgml +++ b/doc/src/sgml/pgstattuple.sgml @@ -358,6 +358,140 @@ pending_tuples | 0 </listitem> </varlistentry> + <varlistentry> + <term> + <indexterm> + <primary>pgstatbloat</primary> + </indexterm> + <function>pgstatbloat(regclass) returns record</> + </term> + + <listitem> + <para> + <function>pgstatbloat</function> is a faster alternative to + <function>pgstattuple</function> that returns approximate results. + The argument is the target relation's OID. + For example: +<programlisting> +test=> SELECT * FROM pgstatbloat('pg_catalog.pg_proc'::regclass); +-[ RECORD 1 ]--------+------- +table_len | 573440 +scanned_percent | 2 +approx_tuple_count | 2740 +approx_tuple_len | 561210 +approx_tuple_percent | 97.87 +dead_tuple_count | 0 +dead_tuple_len | 0 +dead_tuple_percent | 0 +free_space | 11996 +free_percent | 2.09 +</programlisting> + The output columns are described in <xref linkend="pgstatbloat-columns">. + </para> + + <para> + Whereas <function>pgstattuple</function> always performs a + full-table scan and returns an exact count of live and dead tuples + (and their sizes) and free space, <function>pgstatbloat</function> + tries to avoid the full-table scan and returns exact dead tuple and + free space statistics along with an approximation of the number and + size of live tuples. + </para> + + <para> + It does this by skipping pages that have only visible tuples + according to the visibility map (if a page has the corresponding VM + bit set, then it is assumed to contain no dead tuples). For such + pages, it derives the free space value from the free space map, and + assumes that the rest of the space on the page is taken up by live + tuples. + </para> + + <para> + For pages that cannot be skipped, it scans each tuple, recording its + presence and size in the appropriate counters, and adding up the + free space on the page. At the end, it estimates the total number of + live tuples based on the number of pages and tuples scanned (in the + same way that VACUUM estimates pg_class.reltuples). + </para> + + <table id="pgstatbloat-columns"> + <title><function>pgstatbloat</function> Output Columns</title> + <tgroup cols="3"> + <thead> + <row> + <entry>Column</entry> + <entry>Type</entry> + <entry>Description</entry> + </row> + </thead> + + <tbody> + <row> + <entry><structfield>table_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Physical relation length in bytes (exact)</entry> + </row> + <row> + <entry><structfield>scanned_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of table scanned</entry> + </row> + <row> + <entry><structfield>tuple_count</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Number of live tuples (estimated)</entry> + </row> + <row> + <entry><structfield>tuple_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total length of live tuples in bytes (estimated)</entry> + </row> + <row> + <entry><structfield>tuple_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of live tuples</entry> + </row> + <row> + <entry><structfield>dead_tuple_count</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Number of dead tuples (exact)</entry> + </row> + <row> + <entry><structfield>dead_tuple_len</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total length of dead tuples in bytes (exact)</entry> + </row> + <row> + <entry><structfield>dead_tuple_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of dead tuples</entry> + </row> + <row> + <entry><structfield>free_space</structfield></entry> + <entry><type>bigint</type></entry> + <entry>Total free space in bytes (exact)</entry> + </row> + <row> + <entry><structfield>free_percent</structfield></entry> + <entry><type>float8</type></entry> + <entry>Percentage of free space (exact)</entry> + </row> + + </tbody> + </tgroup> + </table> + + <para> + In the above output, the free space figures may not match the + <function>pgstattuple</function> output exactly, because the free + space map gives us an exact figure, but is not guaranteed to be + accurate. + </para> + + </listitem> + </varlistentry> + </variablelist> </sect2> @@ -366,6 +500,7 @@ pending_tuples | 0 <para> Tatsuo Ishii and Satoshi Nagayasu + Abhijit Menon-Sen </para> </sect2> -- 1.9.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers