On Wed, Jan 25, 2012 at 9:47 PM, Noah Misch <[email protected]> wrote:
>
> With all that done, run some quick benchmarks: see how "SELECT free_percent
> FROM pgstattuple(rel)" fares compared to "SELECT relation_free_space(rel)" for
> a large heap and for a large B-tree index. If the timing difference is too
> small to be interesting to you, remove relation_free_space() and submit your
> pgstattuple() improvements alone. Otherwise, submit as written.
>
Ok. I split this in three patches.
1) pgstattuple-gin_spgist.patch
This first patch adds gin and spgist support to pgstattuple, also
makes pgstattuple use a ring buffer when reading tables or indexes.
2) pgstattuple-relation_free_space.patch
This patch adds the relation_free_space function to pgstattuple.
the function relation_free_space() is faster than pgstattuple(), to
test that i initialize pgbench with a scale of 40.
In that context pgstattuple() tooks 1.4s to process pgbench_account
table and relation_free_space() tooks 730ms (half the time!)
In the index the difference is less notorious, 170ms the former and
150ms the latter.
3) pgstattuple-stats_target.patch
This patch adds a stats_target parameter to the relation_free_space()
function, it mimics the way analyze choose the blocks to read and is
faster than plain relation_free_space() but of course could be inexact
if the pages that we don't read are the ones with more free space
--
Jaime Casanova www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index beff1b9..9f2ec1f 100644
*** a/contrib/pgstattuple/pgstatindex.c
--- b/contrib/pgstattuple/pgstatindex.c
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 95,100 ****
--- 95,101 ----
BlockNumber nblocks;
BlockNumber blkno;
BTIndexStat indexStat;
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
if (!superuser())
ereport(ERROR,
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 122,128 ****
* Read metapage
*/
{
! Buffer buffer = ReadBuffer(rel, 0);
Page page = BufferGetPage(buffer);
BTMetaPageData *metad = BTPageGetMeta(page);
--- 123,129 ----
* Read metapage
*/
{
! Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy);
Page page = BufferGetPage(buffer);
BTMetaPageData *metad = BTPageGetMeta(page);
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 159,165 ****
CHECK_FOR_INTERRUPTS();
/* Read and lock buffer */
! buffer = ReadBuffer(rel, blkno);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
--- 160,166 ----
CHECK_FOR_INTERRUPTS();
/* Read and lock buffer */
! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index e5ddd87..6bbc957 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
***************
*** 24,33 ****
--- 24,35 ----
#include "postgres.h"
+ #include "access/gin_private.h"
#include "access/gist_private.h"
#include "access/hash.h"
#include "access/nbtree.h"
#include "access/relscan.h"
+ #include "access/spgist_private.h"
#include "catalog/namespace.h"
#include "funcapi.h"
#include "miscadmin.h"
*************** static void pgstat_hash_page(pgstattuple
*** 73,83 ****
--- 75,96 ----
Relation rel, BlockNumber blkno);
static void pgstat_gist_page(pgstattuple_type *stat,
Relation rel, BlockNumber blkno);
+ static void pgstat_gin_page(pgstattuple_type *stat,
+ Relation rel, BlockNumber blkno);
+ static void pgstat_spgist_page(pgstattuple_type *stat,
+ Relation rel, BlockNumber blkno);
static Datum pgstat_index(Relation rel, BlockNumber start,
pgstat_page pagefn, FunctionCallInfo fcinfo);
static void pgstat_index_page(pgstattuple_type *stat, Page page,
OffsetNumber minoff, OffsetNumber maxoff);
+ /*
+ * Buffer access strategy for reading relations, it's simpler to keep it
+ * global because pgstat_*_page() functions read one buffer at a time.
+ * pgstat_heap() and pgstat_index() should initialize it before use.
+ */
+ BufferAccessStrategy bstrategy;
+
/*
* build_pgstattuple_type -- build a pgstattuple_type tuple
*/
*************** pgstat_relation(Relation rel, FunctionCa
*** 229,235 ****
return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
pgstat_gist_page, fcinfo);
case GIN_AM_OID:
! err = "gin index";
break;
default:
err = "unknown index";
--- 242,253 ----
return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
pgstat_gist_page, fcinfo);
case GIN_AM_OID:
! return pgstat_index(rel, GIN_METAPAGE_BLKNO + 1,
! pgstat_gin_page, fcinfo);
! break;
! case SPGIST_AM_OID:
! return pgstat_index(rel, SPGIST_METAPAGE_BLKNO + 1,
! pgstat_spgist_page, fcinfo);
break;
default:
err = "unknown index";
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 276,281 ****
--- 294,302 ----
nblocks = scan->rs_nblocks; /* # blocks to be scanned */
+ /* prepare access strategy for this table */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
/* scan the relation */
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 309,315 ****
{
CHECK_FOR_INTERRUPTS();
! buffer = ReadBuffer(rel, block);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
UnlockReleaseBuffer(buffer);
--- 330,336 ----
{
CHECK_FOR_INTERRUPTS();
! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
UnlockReleaseBuffer(buffer);
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 322,328 ****
{
CHECK_FOR_INTERRUPTS();
! buffer = ReadBuffer(rel, block);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
UnlockReleaseBuffer(buffer);
--- 343,349 ----
{
CHECK_FOR_INTERRUPTS();
! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
UnlockReleaseBuffer(buffer);
*************** pgstat_btree_page(pgstattuple_type *stat
*** 345,351 ****
Buffer buf;
Page page;
! buf = ReadBuffer(rel, blkno);
LockBuffer(buf, BT_READ);
page = BufferGetPage(buf);
--- 366,372 ----
Buffer buf;
Page page;
! buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
LockBuffer(buf, BT_READ);
page = BufferGetPage(buf);
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 389,395 ****
Page page;
_hash_getlock(rel, blkno, HASH_SHARE);
! buf = _hash_getbuf(rel, blkno, HASH_READ, 0);
page = BufferGetPage(buf);
if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData)))
--- 410,416 ----
Page page;
_hash_getlock(rel, blkno, HASH_SHARE);
! buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy);
page = BufferGetPage(buf);
if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData)))
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 431,437 ****
Buffer buf;
Page page;
! buf = ReadBuffer(rel, blkno);
LockBuffer(buf, GIST_SHARE);
gistcheckpage(rel, buf);
page = BufferGetPage(buf);
--- 452,458 ----
Buffer buf;
Page page;
! buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
LockBuffer(buf, GIST_SHARE);
gistcheckpage(rel, buf);
page = BufferGetPage(buf);
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 450,455 ****
--- 471,538 ----
}
/*
+ * pgstat_gin_page -- check tuples in a gin page
+ */
+ static void
+ pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
+ {
+ Buffer buf;
+ Page page;
+
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ LockBuffer(buf, GIN_SHARE);
+ page = BufferGetPage(buf);
+
+ if (GinPageIsDeleted(page))
+ {
+ /* recyclable page */
+ stat->free_space += BLCKSZ;
+ }
+ else if (GinPageIsLeaf(page) || GinPageIsData(page))
+ {
+ pgstat_index_page(stat, page, FirstOffsetNumber,
+ PageGetMaxOffsetNumber(page));
+ }
+ else
+ {
+ /* root or node */
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ /*
+ * pgstat_spgist_page -- check tuples in a spgist page
+ */
+ static void
+ pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
+ {
+ Buffer buf;
+ Page page;
+
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+ page = BufferGetPage(buf);
+
+ if (SpGistPageIsDeleted(page))
+ {
+ /* recyclable page */
+ stat->free_space += BLCKSZ;
+ }
+ else if (SpGistPageIsLeaf(page))
+ {
+ pgstat_index_page(stat, page, FirstOffsetNumber,
+ PageGetMaxOffsetNumber(page));
+ }
+ else
+ {
+ /* root or node */
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ /*
* pgstat_index -- returns live/dead tuples info in a generic index
*/
static Datum
*************** pgstat_index(Relation rel, BlockNumber s
*** 460,465 ****
--- 543,551 ----
BlockNumber blkno;
pgstattuple_type stat = {0};
+ /* prepare access strategy for this index */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
blkno = start;
for (;;)
{
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 6ac2775..75c4405 100644
*** a/contrib/pgstattuple/Makefile
--- b/contrib/pgstattuple/Makefile
*************** MODULE_big = pgstattuple
*** 4,10 ****
OBJS = pgstattuple.o pgstatindex.o
EXTENSION = pgstattuple
! DATA = pgstattuple--1.0.sql pgstattuple--unpackaged--1.0.sql
REGRESS = pgstattuple
--- 4,13 ----
OBJS = pgstattuple.o pgstatindex.o
EXTENSION = pgstattuple
! DATA = pgstattuple--1.0.sql pgstattuple--1.1.sql \
! pgstattuple--1.0--1.1.sql \
! pgstattuple--unpackaged--1.0.sql
!
REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
index ...d7ad6ca .
*** a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
***************
*** 0 ****
--- 1,12 ----
+ /* contrib/pgstattuple/pgstattuple--1.0--1.1.sql */
+
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.1'" to load this file. \quit
+
+ --
+ -- relation_free_space()
+ --
+ CREATE FUNCTION relation_free_space(IN relname text)
+ RETURNS real
+ AS 'MODULE_PATHNAME', 'relation_free_space'
+ LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql
index ...d94c20f .
*** a/contrib/pgstattuple/pgstattuple--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.1.sql
***************
*** 0 ****
--- 1,57 ----
+ /* contrib/pgstattuple/pgstattuple--1.0.sql */
+
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit
+
+ CREATE FUNCTION pgstattuple(IN relname text,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT tuple_count BIGINT, -- number of live tuples
+ OUT tuple_len BIGINT, -- total tuples length in bytes
+ OUT tuple_percent FLOAT8, -- live tuples in %
+ OUT dead_tuple_count BIGINT, -- number of dead tuples
+ OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in %
+ OUT free_space BIGINT, -- free space in bytes
+ OUT free_percent FLOAT8) -- free space in %
+ AS 'MODULE_PATHNAME', 'pgstattuple'
+ LANGUAGE C STRICT;
+
+ CREATE FUNCTION pgstattuple(IN reloid oid,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT tuple_count BIGINT, -- number of live tuples
+ OUT tuple_len BIGINT, -- total tuples length in bytes
+ OUT tuple_percent FLOAT8, -- live tuples in %
+ OUT dead_tuple_count BIGINT, -- number of dead tuples
+ OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in %
+ OUT free_space BIGINT, -- free space in bytes
+ OUT free_percent FLOAT8) -- free space in %
+ AS 'MODULE_PATHNAME', 'pgstattuplebyid'
+ LANGUAGE C STRICT;
+
+ CREATE FUNCTION pgstatindex(IN relname text,
+ OUT version INT,
+ OUT tree_level INT,
+ OUT index_size BIGINT,
+ OUT root_block_no BIGINT,
+ OUT internal_pages BIGINT,
+ OUT leaf_pages BIGINT,
+ OUT empty_pages BIGINT,
+ OUT deleted_pages BIGINT,
+ OUT avg_leaf_density FLOAT8,
+ OUT leaf_fragmentation FLOAT8)
+ AS 'MODULE_PATHNAME', 'pgstatindex'
+ LANGUAGE C STRICT;
+
+ CREATE FUNCTION pg_relpages(IN relname text)
+ RETURNS BIGINT
+ AS 'MODULE_PATHNAME', 'pg_relpages'
+ LANGUAGE C STRICT;
+
+ --
+ -- relation_free_space()
+ --
+ CREATE FUNCTION relation_free_space(IN relname text)
+ RETURNS real
+ AS 'MODULE_PATHNAME', 'relation_free_space'
+ LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index 89ec4da..1001035 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
*************** PG_MODULE_MAGIC;
*** 43,51 ****
--- 43,53 ----
PG_FUNCTION_INFO_V1(pgstattuple);
PG_FUNCTION_INFO_V1(pgstattuplebyid);
+ PG_FUNCTION_INFO_V1(relation_free_space);
extern Datum pgstattuple(PG_FUNCTION_ARGS);
extern Datum pgstattuplebyid(PG_FUNCTION_ARGS);
+ extern Datum relation_free_space(PG_FUNCTION_ARGS);
/*
* struct pgstattuple_type
*************** typedef struct pgstattuple_type
*** 63,93 ****
uint64 free_space; /* free/reusable space in bytes */
} pgstattuple_type;
! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber);
static Datum build_pgstattuple_type(pgstattuple_type *stat,
FunctionCallInfo fcinfo);
static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo);
static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo);
static void pgstat_btree_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno);
static void pgstat_hash_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno);
static void pgstat_gist_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno);
static void pgstat_gin_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno);
static void pgstat_spgist_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno);
static Datum pgstat_index(Relation rel, BlockNumber start,
pgstat_page pagefn, FunctionCallInfo fcinfo);
static void pgstat_index_page(pgstattuple_type *stat, Page page,
! OffsetNumber minoff, OffsetNumber maxoff);
/*
* Buffer access strategy for reading relations, it's simpler to keep it
* global because pgstat_*_page() functions read one buffer at a time.
! * pgstat_heap() and pgstat_index() should initialize it before use.
*/
BufferAccessStrategy bstrategy;
--- 65,98 ----
uint64 free_space; /* free/reusable space in bytes */
} pgstattuple_type;
! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber, bool);
static Datum build_pgstattuple_type(pgstattuple_type *stat,
FunctionCallInfo fcinfo);
static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo);
static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo);
static void pgstat_btree_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno, bool enable_counters);
static void pgstat_hash_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno, bool enable_counters);
static void pgstat_gist_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno, bool enable_counters);
static void pgstat_gin_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno, bool enable_counters);
static void pgstat_spgist_page(pgstattuple_type *stat,
! Relation rel, BlockNumber blkno, bool enable_counters);
static Datum pgstat_index(Relation rel, BlockNumber start,
pgstat_page pagefn, FunctionCallInfo fcinfo);
static void pgstat_index_page(pgstattuple_type *stat, Page page,
! OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
!
! static float4 GetHeapRelationFreeSpace(Relation rel);
! static float4 GetIndexRelationFreeSpace(Relation rel);
/*
* Buffer access strategy for reading relations, it's simpler to keep it
* global because pgstat_*_page() functions read one buffer at a time.
! * every consumer of bstrategy should initialize it before use.
*/
BufferAccessStrategy bstrategy;
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 362,368 ****
* pgstat_btree_page -- check tuples in a btree page
*/
static void
! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
{
Buffer buf;
Page page;
--- 367,373 ----
* pgstat_btree_page -- check tuples in a btree page
*/
static void
! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
{
Buffer buf;
Page page;
*************** pgstat_btree_page(pgstattuple_type *stat
*** 390,396 ****
else if (P_ISLEAF(opaque))
{
pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque),
! PageGetMaxOffsetNumber(page));
}
else
{
--- 395,401 ----
else if (P_ISLEAF(opaque))
{
pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque),
! PageGetMaxOffsetNumber(page), enable_counters);
}
else
{
*************** pgstat_btree_page(pgstattuple_type *stat
*** 405,411 ****
* pgstat_hash_page -- check tuples in a hash page
*/
static void
! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
{
Buffer buf;
Page page;
--- 410,416 ----
* pgstat_hash_page -- check tuples in a hash page
*/
static void
! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
{
Buffer buf;
Page page;
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 427,433 ****
case LH_BUCKET_PAGE:
case LH_OVERFLOW_PAGE:
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page));
break;
case LH_BITMAP_PAGE:
case LH_META_PAGE:
--- 432,438 ----
case LH_BUCKET_PAGE:
case LH_OVERFLOW_PAGE:
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page), enable_counters);
break;
case LH_BITMAP_PAGE:
case LH_META_PAGE:
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 448,454 ****
* pgstat_gist_page -- check tuples in a gist page
*/
static void
! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
{
Buffer buf;
Page page;
--- 453,459 ----
* pgstat_gist_page -- check tuples in a gist page
*/
static void
! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
{
Buffer buf;
Page page;
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 461,467 ****
if (GistPageIsLeaf(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page));
}
else
{
--- 466,472 ----
if (GistPageIsLeaf(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page), enable_counters);
}
else
{
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 475,481 ****
* pgstat_gin_page -- check tuples in a gin page
*/
static void
! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
{
Buffer buf;
Page page;
--- 480,486 ----
* pgstat_gin_page -- check tuples in a gin page
*/
static void
! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
{
Buffer buf;
Page page;
*************** pgstat_gin_page(pgstattuple_type *stat,
*** 492,498 ****
else if (GinPageIsLeaf(page) || GinPageIsData(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page));
}
else
{
--- 497,503 ----
else if (GinPageIsLeaf(page) || GinPageIsData(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page), enable_counters);
}
else
{
*************** pgstat_gin_page(pgstattuple_type *stat,
*** 506,512 ****
* pgstat_spgist_page -- check tuples in a spgist page
*/
static void
! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
{
Buffer buf;
Page page;
--- 511,517 ----
* pgstat_spgist_page -- check tuples in a spgist page
*/
static void
! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
{
Buffer buf;
Page page;
*************** pgstat_spgist_page(pgstattuple_type *sta
*** 523,529 ****
else if (SpGistPageIsLeaf(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page));
}
else
{
--- 528,534 ----
else if (SpGistPageIsLeaf(page))
{
pgstat_index_page(stat, page, FirstOffsetNumber,
! PageGetMaxOffsetNumber(page), enable_counters);
}
else
{
*************** pgstat_index(Relation rel, BlockNumber s
*** 567,573 ****
{
CHECK_FOR_INTERRUPTS();
! pagefn(&stat, rel, blkno);
}
}
--- 572,578 ----
{
CHECK_FOR_INTERRUPTS();
! pagefn(&stat, rel, blkno, true);
}
}
*************** pgstat_index(Relation rel, BlockNumber s
*** 581,605 ****
*/
static void
pgstat_index_page(pgstattuple_type *stat, Page page,
! OffsetNumber minoff, OffsetNumber maxoff)
{
OffsetNumber i;
stat->free_space += PageGetFreeSpace(page);
! for (i = minoff; i <= maxoff; i = OffsetNumberNext(i))
{
! ItemId itemid = PageGetItemId(page, i);
! if (ItemIdIsDead(itemid))
{
! stat->dead_tuple_count++;
! stat->dead_tuple_len += ItemIdGetLength(itemid);
}
! else
{
! stat->tuple_count++;
! stat->tuple_len += ItemIdGetLength(itemid);
}
}
}
--- 586,808 ----
*/
static void
pgstat_index_page(pgstattuple_type *stat, Page page,
! OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters)
{
OffsetNumber i;
stat->free_space += PageGetFreeSpace(page);
! if (enable_counters)
{
! for (i = minoff; i <= maxoff; i = OffsetNumberNext(i))
! {
! ItemId itemid = PageGetItemId(page, i);
!
! if (ItemIdIsDead(itemid))
! {
! stat->dead_tuple_count++;
! stat->dead_tuple_len += ItemIdGetLength(itemid);
! }
! else
! {
! stat->tuple_count++;
! stat->tuple_len += ItemIdGetLength(itemid);
! }
! }
! }
! }
! /*
! * relation_free_space
! *
! * Returns the percentage of free space for a given relation.
! */
! Datum
! relation_free_space(PG_FUNCTION_ARGS)
! {
! text *relname = PG_GETARG_TEXT_P(0);
! RangeVar *relrv;
! Relation rel;
! float4 free_space = 0;
!
! relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
! rel = relation_openrv(relrv, AccessShareLock);
!
! /* Only allow tables or indexes */
! if (rel->rd_rel->relkind != RELKIND_INDEX &&
! rel->rd_rel->relkind != RELKIND_RELATION &&
! rel->rd_rel->relkind != RELKIND_TOASTVALUE)
! ereport(ERROR,
! (errcode(ERRCODE_WRONG_OBJECT_TYPE),
! errmsg("cannot get free space from object \"%s\"",
! RelationGetRelationName(rel))));
!
! /*
! * Reject attempts to read non-local temporary relations. We would be
! * likely to get wrong data, since we have no visibility into the owning
! * session's local buffers.
! */
! if (RELATION_IS_OTHER_TEMP(rel))
! ereport(ERROR,
! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! errmsg("cannot access temporary tables of other sessions")));
!
! switch (rel->rd_rel->relkind)
! {
! case RELKIND_RELATION:
! case RELKIND_TOASTVALUE:
! free_space = GetHeapRelationFreeSpace(rel);
! break;
! case RELKIND_INDEX:
! free_space = GetIndexRelationFreeSpace(rel);
! break;
! default:
! /* we are not supposed to get here */
! ereport(ERROR,
! (errcode(ERRCODE_WRONG_OBJECT_TYPE),
! errmsg("cannot get free space from object \"%s\"",
! RelationGetRelationName(rel))));
! }
! relation_close(rel, AccessShareLock);
!
! PG_RETURN_FLOAT4(free_space);
! }
!
! /*
! * GetHeapRelationFreeSpace()
! *
! * Get the free space for a heap relation.
! * This is a helper function for relation_free_space()
! */
! static float4
! GetHeapRelationFreeSpace(Relation rel)
! {
! Buffer buffer;
! Page page;
! BlockNumber blkno = 0;
! BlockNumber nblocks;
! Size free_space = 0;
! double free_percent = 0;
!
! /* prepare access strategy for reading the table */
! bstrategy = GetAccessStrategy(BAS_BULKREAD);
!
! for (;;)
! {
! /* Get the current relation length */
! LockRelationForExtension(rel, ExclusiveLock);
! nblocks = RelationGetNumberOfBlocks(rel);
! UnlockRelationForExtension(rel, ExclusiveLock);
!
! /* Quit if we've scanned the whole relation */
! if (blkno >= nblocks)
{
! break;
}
!
! for (; blkno < nblocks; blkno++)
{
! CHECK_FOR_INTERRUPTS();
!
! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
! LockBuffer(buffer, BUFFER_LOCK_SHARE);
!
! page = BufferGetPage(buffer);
! free_space += PageGetHeapFreeSpace(page);
!
! UnlockReleaseBuffer(buffer);
}
}
+
+ if (nblocks > 0)
+ free_percent = ((float4) free_space) / (nblocks * BLCKSZ);
+
+ return free_percent;
+ }
+
+ /*------------------------------------------------
+ * GetIndexRelationFreeSpace
+ *
+ * Look the type of the index and call the appropiate
+ * function to get the free space
+ *------------------------------------------------
+ */
+ static float4
+ GetIndexRelationFreeSpace(Relation rel)
+ {
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ pgstattuple_type stat = {0};
+ double free_percent = 0;
+
+ /* prepare access strategy for reading the index */
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ /* start at block */
+ switch (rel->rd_rel->relam)
+ {
+ case BTREE_AM_OID:
+ blkno = BTREE_METAPAGE + 1;
+ break;
+ case HASH_AM_OID:
+ blkno = HASH_METAPAGE + 1;
+ break;
+ case GIST_AM_OID:
+ blkno = GIST_ROOT_BLKNO + 1;
+ break;
+ case GIN_AM_OID:
+ blkno = GIN_METAPAGE_BLKNO + 1;
+ break;
+ case SPGIST_AM_OID:
+ blkno = SPGIST_METAPAGE_BLKNO + 1;
+ break;
+ default:
+ blkno = 0;
+ }
+
+ for (;;)
+ {
+ /* Get the current relation length */
+ LockRelationForExtension(rel, ExclusiveLock);
+ nblocks = RelationGetNumberOfBlocks(rel);
+ UnlockRelationForExtension(rel, ExclusiveLock);
+
+ /* Quit if we've scanned the whole relation */
+ if (blkno >= nblocks)
+ {
+ break;
+ }
+
+ for (; blkno < nblocks; blkno++)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ switch (rel->rd_rel->relam)
+ {
+ case BTREE_AM_OID:
+ pgstat_btree_page(&stat, rel, blkno, false);
+ break;
+ case HASH_AM_OID:
+ pgstat_hash_page(&stat, rel, blkno, false);
+ break;
+ case GIST_AM_OID:
+ pgstat_gist_page(&stat, rel, blkno, false);
+ break;
+ case GIN_AM_OID:
+ pgstat_gin_page(&stat, rel, blkno, false);
+ break;
+ case SPGIST_AM_OID:
+ pgstat_spgist_page(&stat, rel, blkno, false);
+ break;
+ default:
+ elog(ERROR, "Unknown index type %d", rel->rd_rel->relam);
+ break;
+ }
+ }
+ }
+
+ if (nblocks > 0)
+ free_percent = ((float4) stat.free_space) / (nblocks * BLCKSZ);
+
+ return free_percent;
}
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index 7b5129b..fcfd36f 100644
*** a/contrib/pgstattuple/pgstattuple.control
--- b/contrib/pgstattuple/pgstattuple.control
***************
*** 1,5 ****
# pgstattuple extension
comment = 'show tuple-level statistics'
! default_version = '1.0'
module_pathname = '$libdir/pgstattuple'
relocatable = true
--- 1,5 ----
# pgstattuple extension
comment = 'show tuple-level statistics'
! default_version = '1.1'
module_pathname = '$libdir/pgstattuple'
relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index a55b35c..4e99a54 100644
*** a/doc/src/sgml/pgstattuple.sgml
--- b/doc/src/sgml/pgstattuple.sgml
*************** leaf_fragmentation | 0
*** 256,261 ****
--- 256,277 ----
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <function>relation_free_space(relname text) returns float</function>
+ </term>
+
+ <listitem>
+ <para>
+ <function>relation_free_space</function> returns a factor that represents
+ the total amount of free space in a table or index. The entire relation
+ is scanned in order to compute this amount, which may generate a large
+ amount of disk reads.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql
index d94c20f..08507c1 100644
*** a/contrib/pgstattuple/pgstattuple--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.1.sql
*************** LANGUAGE C STRICT;
*** 51,57 ****
--
-- relation_free_space()
--
! CREATE FUNCTION relation_free_space(IN relname text)
RETURNS real
AS 'MODULE_PATHNAME', 'relation_free_space'
LANGUAGE C STRICT;
--- 51,57 ----
--
-- relation_free_space()
--
! CREATE FUNCTION relation_free_space(IN relname text, IN stats_target int DEFAULT 100)
RETURNS real
AS 'MODULE_PATHNAME', 'relation_free_space'
LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index 1001035..8649b6a 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
*************** static Datum pgstat_index(Relation rel,
*** 86,93 ****
static void pgstat_index_page(pgstattuple_type *stat, Page page,
OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
! static float4 GetHeapRelationFreeSpace(Relation rel);
! static float4 GetIndexRelationFreeSpace(Relation rel);
/*
* Buffer access strategy for reading relations, it's simpler to keep it
--- 86,94 ----
static void pgstat_index_page(pgstattuple_type *stat, Page page,
OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
! static float4 GetHeapRelationFreeSpace(Relation rel, int32 stats_target);
! static float4 GetIndexRelationFreeSpace(Relation rel, int32 stats_target);
! static BlockNumber get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target);
/*
* Buffer access strategy for reading relations, it's simpler to keep it
*************** Datum
*** 621,630 ****
--- 622,637 ----
relation_free_space(PG_FUNCTION_ARGS)
{
text *relname = PG_GETARG_TEXT_P(0);
+ int32 stats_target = PG_GETARG_INT32(1);
RangeVar *relrv;
Relation rel;
float4 free_space = 0;
+ if (stats_target < 1 || stats_target > 100)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("parameter stats_target should be between 1 and 100")));
+
relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
rel = relation_openrv(relrv, AccessShareLock);
*************** relation_free_space(PG_FUNCTION_ARGS)
*** 651,660 ****
{
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
! free_space = GetHeapRelationFreeSpace(rel);
break;
case RELKIND_INDEX:
! free_space = GetIndexRelationFreeSpace(rel);
break;
default:
/* we are not supposed to get here */
--- 658,667 ----
{
case RELKIND_RELATION:
case RELKIND_TOASTVALUE:
! free_space = GetHeapRelationFreeSpace(rel, stats_target);
break;
case RELKIND_INDEX:
! free_space = GetIndexRelationFreeSpace(rel, stats_target);
break;
default:
/* we are not supposed to get here */
*************** relation_free_space(PG_FUNCTION_ARGS)
*** 675,686 ****
* This is a helper function for relation_free_space()
*/
static float4
! GetHeapRelationFreeSpace(Relation rel)
{
Buffer buffer;
Page page;
BlockNumber blkno = 0;
BlockNumber nblocks;
Size free_space = 0;
double free_percent = 0;
--- 682,695 ----
* This is a helper function for relation_free_space()
*/
static float4
! GetHeapRelationFreeSpace(Relation rel, int32 stats_target)
{
Buffer buffer;
Page page;
BlockNumber blkno = 0;
BlockNumber nblocks;
+ BlockNumber totalBlocksInRelation;
+ int blocks_read = 0;
Size free_space = 0;
double free_percent = 0;
*************** GetHeapRelationFreeSpace(Relation rel)
*** 691,716 ****
{
/* Get the current relation length */
LockRelationForExtension(rel, ExclusiveLock);
! nblocks = RelationGetNumberOfBlocks(rel);
UnlockRelationForExtension(rel, ExclusiveLock);
/* Quit if we've scanned the whole relation */
if (blkno >= nblocks)
{
break;
}
! for (; blkno < nblocks; blkno++)
{
CHECK_FOR_INTERRUPTS();
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
free_space += PageGetHeapFreeSpace(page);
UnlockReleaseBuffer(buffer);
}
}
--- 700,733 ----
{
/* Get the current relation length */
LockRelationForExtension(rel, ExclusiveLock);
! totalBlocksInRelation = RelationGetNumberOfBlocks(rel);
UnlockRelationForExtension(rel, ExclusiveLock);
+ nblocks = totalBlocksInRelation * stats_target / 100;
+
/* Quit if we've scanned the whole relation */
if (blkno >= nblocks)
{
break;
}
! for (; blkno < nblocks; )
{
CHECK_FOR_INTERRUPTS();
buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ blocks_read++;
page = BufferGetPage(buffer);
free_space += PageGetHeapFreeSpace(page);
UnlockReleaseBuffer(buffer);
+
+ if (stats_target == 100)
+ blkno++;
+ else
+ blkno = get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target);
}
}
*************** GetHeapRelationFreeSpace(Relation rel)
*** 728,737 ****
*------------------------------------------------
*/
static float4
! GetIndexRelationFreeSpace(Relation rel)
{
- BlockNumber nblocks;
BlockNumber blkno;
pgstattuple_type stat = {0};
double free_percent = 0;
--- 745,756 ----
*------------------------------------------------
*/
static float4
! GetIndexRelationFreeSpace(Relation rel, int32 stats_target)
{
BlockNumber blkno;
+ BlockNumber nblocks;
+ BlockNumber totalBlocksInRelation;
+ int blocks_read = 0;
pgstattuple_type stat = {0};
double free_percent = 0;
*************** GetIndexRelationFreeSpace(Relation rel)
*** 764,779 ****
{
/* Get the current relation length */
LockRelationForExtension(rel, ExclusiveLock);
! nblocks = RelationGetNumberOfBlocks(rel);
UnlockRelationForExtension(rel, ExclusiveLock);
/* Quit if we've scanned the whole relation */
if (blkno >= nblocks)
{
break;
}
! for (; blkno < nblocks; blkno++)
{
CHECK_FOR_INTERRUPTS();
--- 783,800 ----
{
/* Get the current relation length */
LockRelationForExtension(rel, ExclusiveLock);
! totalBlocksInRelation = RelationGetNumberOfBlocks(rel);
UnlockRelationForExtension(rel, ExclusiveLock);
+ nblocks = totalBlocksInRelation * stats_target / 100;
+
/* Quit if we've scanned the whole relation */
if (blkno >= nblocks)
{
break;
}
! for (; blkno < nblocks; )
{
CHECK_FOR_INTERRUPTS();
*************** GetIndexRelationFreeSpace(Relation rel)
*** 798,803 ****
--- 819,830 ----
elog(ERROR, "Unknown index type %d", rel->rd_rel->relam);
break;
}
+ blocks_read++;
+
+ if (stats_target == 100)
+ blkno++;
+ else
+ get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target);
}
}
*************** GetIndexRelationFreeSpace(Relation rel)
*** 806,808 ****
--- 833,870 ----
return free_percent;
}
+
+ static BlockNumber
+ get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target)
+ {
+ BlockNumber K = nblocks - blkno; /* remaining blocks */
+ int k = (nblocks * stats_target / 100) - blocks_read; /* blocks still to sample */
+ double p; /* probability to skip block */
+ double V;
+
+ if (k > K)
+ {
+ /* need all the rest */
+ return blkno + 1;
+ }
+
+ V = ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+ p = 1.0 - (double) k / (double) K;
+ while (V < p)
+ {
+ if (k > K)
+ {
+ /* need all the rest */
+ return blkno + 1;
+ }
+
+ /* skip */
+ blkno++;
+ K--; /* keep K == N - t */
+
+ /* adjust p to be new cutoff point in reduced range */
+ p *= 1.0 - (double) k / (double) K;
+ }
+
+ return blkno + 1;
+ }
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers