On Wed, Jan 25, 2012 at 9:47 PM, Noah Misch <n...@leadboat.com> wrote: > > With all that done, run some quick benchmarks: see how "SELECT free_percent > FROM pgstattuple(rel)" fares compared to "SELECT relation_free_space(rel)" for > a large heap and for a large B-tree index. If the timing difference is too > small to be interesting to you, remove relation_free_space() and submit your > pgstattuple() improvements alone. Otherwise, submit as written. >
Ok. I split this in three patches. 1) pgstattuple-gin_spgist.patch This first patch adds gin and spgist support to pgstattuple, also makes pgstattuple use a ring buffer when reading tables or indexes. 2) pgstattuple-relation_free_space.patch This patch adds the relation_free_space function to pgstattuple. the function relation_free_space() is faster than pgstattuple(), to test that i initialize pgbench with a scale of 40. In that context pgstattuple() tooks 1.4s to process pgbench_account table and relation_free_space() tooks 730ms (half the time!) In the index the difference is less notorious, 170ms the former and 150ms the latter. 3) pgstattuple-stats_target.patch This patch adds a stats_target parameter to the relation_free_space() function, it mimics the way analyze choose the blocks to read and is faster than plain relation_free_space() but of course could be inexact if the pages that we don't read are the ones with more free space -- Jaime Casanova www.2ndQuadrant.com Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index beff1b9..9f2ec1f 100644 *** a/contrib/pgstattuple/pgstatindex.c --- b/contrib/pgstattuple/pgstatindex.c *************** pgstatindex(PG_FUNCTION_ARGS) *** 95,100 **** --- 95,101 ---- BlockNumber nblocks; BlockNumber blkno; BTIndexStat indexStat; + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); if (!superuser()) ereport(ERROR, *************** pgstatindex(PG_FUNCTION_ARGS) *** 122,128 **** * Read metapage */ { ! Buffer buffer = ReadBuffer(rel, 0); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); --- 123,129 ---- * Read metapage */ { ! Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); *************** pgstatindex(PG_FUNCTION_ARGS) *** 159,165 **** CHECK_FOR_INTERRUPTS(); /* Read and lock buffer */ ! buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); --- 160,166 ---- CHECK_FOR_INTERRUPTS(); /* Read and lock buffer */ ! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index e5ddd87..6bbc957 100644 *** a/contrib/pgstattuple/pgstattuple.c --- b/contrib/pgstattuple/pgstattuple.c *************** *** 24,33 **** --- 24,35 ---- #include "postgres.h" + #include "access/gin_private.h" #include "access/gist_private.h" #include "access/hash.h" #include "access/nbtree.h" #include "access/relscan.h" + #include "access/spgist_private.h" #include "catalog/namespace.h" #include "funcapi.h" #include "miscadmin.h" *************** static void pgstat_hash_page(pgstattuple *** 73,83 **** --- 75,96 ---- Relation rel, BlockNumber blkno); static void pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno); + static void pgstat_gin_page(pgstattuple_type *stat, + Relation rel, BlockNumber blkno); + static void pgstat_spgist_page(pgstattuple_type *stat, + Relation rel, BlockNumber blkno); static Datum pgstat_index(Relation rel, BlockNumber start, pgstat_page pagefn, FunctionCallInfo fcinfo); static void pgstat_index_page(pgstattuple_type *stat, Page page, OffsetNumber minoff, OffsetNumber maxoff); + /* + * Buffer access strategy for reading relations, it's simpler to keep it + * global because pgstat_*_page() functions read one buffer at a time. + * pgstat_heap() and pgstat_index() should initialize it before use. + */ + BufferAccessStrategy bstrategy; + /* * build_pgstattuple_type -- build a pgstattuple_type tuple */ *************** pgstat_relation(Relation rel, FunctionCa *** 229,235 **** return pgstat_index(rel, GIST_ROOT_BLKNO + 1, pgstat_gist_page, fcinfo); case GIN_AM_OID: ! err = "gin index"; break; default: err = "unknown index"; --- 242,253 ---- return pgstat_index(rel, GIST_ROOT_BLKNO + 1, pgstat_gist_page, fcinfo); case GIN_AM_OID: ! return pgstat_index(rel, GIN_METAPAGE_BLKNO + 1, ! pgstat_gin_page, fcinfo); ! break; ! case SPGIST_AM_OID: ! return pgstat_index(rel, SPGIST_METAPAGE_BLKNO + 1, ! pgstat_spgist_page, fcinfo); break; default: err = "unknown index"; *************** pgstat_heap(Relation rel, FunctionCallIn *** 276,281 **** --- 294,302 ---- nblocks = scan->rs_nblocks; /* # blocks to be scanned */ + /* prepare access strategy for this table */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + /* scan the relation */ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { *************** pgstat_heap(Relation rel, FunctionCallIn *** 309,315 **** { CHECK_FOR_INTERRUPTS(); ! buffer = ReadBuffer(rel, block); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); --- 330,336 ---- { CHECK_FOR_INTERRUPTS(); ! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); *************** pgstat_heap(Relation rel, FunctionCallIn *** 322,328 **** { CHECK_FOR_INTERRUPTS(); ! buffer = ReadBuffer(rel, block); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); --- 343,349 ---- { CHECK_FOR_INTERRUPTS(); ! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); *************** pgstat_btree_page(pgstattuple_type *stat *** 345,351 **** Buffer buf; Page page; ! buf = ReadBuffer(rel, blkno); LockBuffer(buf, BT_READ); page = BufferGetPage(buf); --- 366,372 ---- Buffer buf; Page page; ! buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buf, BT_READ); page = BufferGetPage(buf); *************** pgstat_hash_page(pgstattuple_type *stat, *** 389,395 **** Page page; _hash_getlock(rel, blkno, HASH_SHARE); ! buf = _hash_getbuf(rel, blkno, HASH_READ, 0); page = BufferGetPage(buf); if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData))) --- 410,416 ---- Page page; _hash_getlock(rel, blkno, HASH_SHARE); ! buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy); page = BufferGetPage(buf); if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData))) *************** pgstat_gist_page(pgstattuple_type *stat, *** 431,437 **** Buffer buf; Page page; ! buf = ReadBuffer(rel, blkno); LockBuffer(buf, GIST_SHARE); gistcheckpage(rel, buf); page = BufferGetPage(buf); --- 452,458 ---- Buffer buf; Page page; ! buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buf, GIST_SHARE); gistcheckpage(rel, buf); page = BufferGetPage(buf); *************** pgstat_gist_page(pgstattuple_type *stat, *** 450,455 **** --- 471,538 ---- } /* + * pgstat_gin_page -- check tuples in a gin page + */ + static void + pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) + { + Buffer buf; + Page page; + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + LockBuffer(buf, GIN_SHARE); + page = BufferGetPage(buf); + + if (GinPageIsDeleted(page)) + { + /* recyclable page */ + stat->free_space += BLCKSZ; + } + else if (GinPageIsLeaf(page) || GinPageIsData(page)) + { + pgstat_index_page(stat, page, FirstOffsetNumber, + PageGetMaxOffsetNumber(page)); + } + else + { + /* root or node */ + } + + UnlockReleaseBuffer(buf); + } + + /* + * pgstat_spgist_page -- check tuples in a spgist page + */ + static void + pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) + { + Buffer buf; + Page page; + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + if (SpGistPageIsDeleted(page)) + { + /* recyclable page */ + stat->free_space += BLCKSZ; + } + else if (SpGistPageIsLeaf(page)) + { + pgstat_index_page(stat, page, FirstOffsetNumber, + PageGetMaxOffsetNumber(page)); + } + else + { + /* root or node */ + } + + UnlockReleaseBuffer(buf); + } + + /* * pgstat_index -- returns live/dead tuples info in a generic index */ static Datum *************** pgstat_index(Relation rel, BlockNumber s *** 460,465 **** --- 543,551 ---- BlockNumber blkno; pgstattuple_type stat = {0}; + /* prepare access strategy for this index */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + blkno = start; for (;;) {
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile index 6ac2775..75c4405 100644 *** a/contrib/pgstattuple/Makefile --- b/contrib/pgstattuple/Makefile *************** MODULE_big = pgstattuple *** 4,10 **** OBJS = pgstattuple.o pgstatindex.o EXTENSION = pgstattuple ! DATA = pgstattuple--1.0.sql pgstattuple--unpackaged--1.0.sql REGRESS = pgstattuple --- 4,13 ---- OBJS = pgstattuple.o pgstatindex.o EXTENSION = pgstattuple ! DATA = pgstattuple--1.0.sql pgstattuple--1.1.sql \ ! pgstattuple--1.0--1.1.sql \ ! pgstattuple--unpackaged--1.0.sql ! REGRESS = pgstattuple diff --git a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql index ...d7ad6ca . *** a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql --- b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql *************** *** 0 **** --- 1,12 ---- + /* contrib/pgstattuple/pgstattuple--1.0--1.1.sql */ + + -- complain if script is sourced in psql, rather than via CREATE EXTENSION + \echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.1'" to load this file. \quit + + -- + -- relation_free_space() + -- + CREATE FUNCTION relation_free_space(IN relname text) + RETURNS real + AS 'MODULE_PATHNAME', 'relation_free_space' + LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql index ...d94c20f . *** a/contrib/pgstattuple/pgstattuple--1.1.sql --- b/contrib/pgstattuple/pgstattuple--1.1.sql *************** *** 0 **** --- 1,57 ---- + /* contrib/pgstattuple/pgstattuple--1.0.sql */ + + -- complain if script is sourced in psql, rather than via CREATE EXTENSION + \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit + + CREATE FUNCTION pgstattuple(IN relname text, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % + AS 'MODULE_PATHNAME', 'pgstattuple' + LANGUAGE C STRICT; + + CREATE FUNCTION pgstattuple(IN reloid oid, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % + AS 'MODULE_PATHNAME', 'pgstattuplebyid' + LANGUAGE C STRICT; + + CREATE FUNCTION pgstatindex(IN relname text, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) + AS 'MODULE_PATHNAME', 'pgstatindex' + LANGUAGE C STRICT; + + CREATE FUNCTION pg_relpages(IN relname text) + RETURNS BIGINT + AS 'MODULE_PATHNAME', 'pg_relpages' + LANGUAGE C STRICT; + + -- + -- relation_free_space() + -- + CREATE FUNCTION relation_free_space(IN relname text) + RETURNS real + AS 'MODULE_PATHNAME', 'relation_free_space' + LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 89ec4da..1001035 100644 *** a/contrib/pgstattuple/pgstattuple.c --- b/contrib/pgstattuple/pgstattuple.c *************** PG_MODULE_MAGIC; *** 43,51 **** --- 43,53 ---- PG_FUNCTION_INFO_V1(pgstattuple); PG_FUNCTION_INFO_V1(pgstattuplebyid); + PG_FUNCTION_INFO_V1(relation_free_space); extern Datum pgstattuple(PG_FUNCTION_ARGS); extern Datum pgstattuplebyid(PG_FUNCTION_ARGS); + extern Datum relation_free_space(PG_FUNCTION_ARGS); /* * struct pgstattuple_type *************** typedef struct pgstattuple_type *** 63,93 **** uint64 free_space; /* free/reusable space in bytes */ } pgstattuple_type; ! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber); static Datum build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo); static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo); static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo); static void pgstat_btree_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno); static void pgstat_hash_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno); static void pgstat_gist_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno); static void pgstat_gin_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno); static void pgstat_spgist_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno); static Datum pgstat_index(Relation rel, BlockNumber start, pgstat_page pagefn, FunctionCallInfo fcinfo); static void pgstat_index_page(pgstattuple_type *stat, Page page, ! OffsetNumber minoff, OffsetNumber maxoff); /* * Buffer access strategy for reading relations, it's simpler to keep it * global because pgstat_*_page() functions read one buffer at a time. ! * pgstat_heap() and pgstat_index() should initialize it before use. */ BufferAccessStrategy bstrategy; --- 65,98 ---- uint64 free_space; /* free/reusable space in bytes */ } pgstattuple_type; ! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber, bool); static Datum build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo); static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo); static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo); static void pgstat_btree_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno, bool enable_counters); static void pgstat_hash_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno, bool enable_counters); static void pgstat_gist_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno, bool enable_counters); static void pgstat_gin_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno, bool enable_counters); static void pgstat_spgist_page(pgstattuple_type *stat, ! Relation rel, BlockNumber blkno, bool enable_counters); static Datum pgstat_index(Relation rel, BlockNumber start, pgstat_page pagefn, FunctionCallInfo fcinfo); static void pgstat_index_page(pgstattuple_type *stat, Page page, ! OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters); ! ! static float4 GetHeapRelationFreeSpace(Relation rel); ! static float4 GetIndexRelationFreeSpace(Relation rel); /* * Buffer access strategy for reading relations, it's simpler to keep it * global because pgstat_*_page() functions read one buffer at a time. ! * every consumer of bstrategy should initialize it before use. */ BufferAccessStrategy bstrategy; *************** pgstat_heap(Relation rel, FunctionCallIn *** 362,368 **** * pgstat_btree_page -- check tuples in a btree page */ static void ! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) { Buffer buf; Page page; --- 367,373 ---- * pgstat_btree_page -- check tuples in a btree page */ static void ! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters) { Buffer buf; Page page; *************** pgstat_btree_page(pgstattuple_type *stat *** 390,396 **** else if (P_ISLEAF(opaque)) { pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque), ! PageGetMaxOffsetNumber(page)); } else { --- 395,401 ---- else if (P_ISLEAF(opaque)) { pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque), ! PageGetMaxOffsetNumber(page), enable_counters); } else { *************** pgstat_btree_page(pgstattuple_type *stat *** 405,411 **** * pgstat_hash_page -- check tuples in a hash page */ static void ! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) { Buffer buf; Page page; --- 410,416 ---- * pgstat_hash_page -- check tuples in a hash page */ static void ! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters) { Buffer buf; Page page; *************** pgstat_hash_page(pgstattuple_type *stat, *** 427,433 **** case LH_BUCKET_PAGE: case LH_OVERFLOW_PAGE: pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page)); break; case LH_BITMAP_PAGE: case LH_META_PAGE: --- 432,438 ---- case LH_BUCKET_PAGE: case LH_OVERFLOW_PAGE: pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page), enable_counters); break; case LH_BITMAP_PAGE: case LH_META_PAGE: *************** pgstat_hash_page(pgstattuple_type *stat, *** 448,454 **** * pgstat_gist_page -- check tuples in a gist page */ static void ! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) { Buffer buf; Page page; --- 453,459 ---- * pgstat_gist_page -- check tuples in a gist page */ static void ! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters) { Buffer buf; Page page; *************** pgstat_gist_page(pgstattuple_type *stat, *** 461,467 **** if (GistPageIsLeaf(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page)); } else { --- 466,472 ---- if (GistPageIsLeaf(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page), enable_counters); } else { *************** pgstat_gist_page(pgstattuple_type *stat, *** 475,481 **** * pgstat_gin_page -- check tuples in a gin page */ static void ! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) { Buffer buf; Page page; --- 480,486 ---- * pgstat_gin_page -- check tuples in a gin page */ static void ! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters) { Buffer buf; Page page; *************** pgstat_gin_page(pgstattuple_type *stat, *** 492,498 **** else if (GinPageIsLeaf(page) || GinPageIsData(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page)); } else { --- 497,503 ---- else if (GinPageIsLeaf(page) || GinPageIsData(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page), enable_counters); } else { *************** pgstat_gin_page(pgstattuple_type *stat, *** 506,512 **** * pgstat_spgist_page -- check tuples in a spgist page */ static void ! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno) { Buffer buf; Page page; --- 511,517 ---- * pgstat_spgist_page -- check tuples in a spgist page */ static void ! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters) { Buffer buf; Page page; *************** pgstat_spgist_page(pgstattuple_type *sta *** 523,529 **** else if (SpGistPageIsLeaf(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page)); } else { --- 528,534 ---- else if (SpGistPageIsLeaf(page)) { pgstat_index_page(stat, page, FirstOffsetNumber, ! PageGetMaxOffsetNumber(page), enable_counters); } else { *************** pgstat_index(Relation rel, BlockNumber s *** 567,573 **** { CHECK_FOR_INTERRUPTS(); ! pagefn(&stat, rel, blkno); } } --- 572,578 ---- { CHECK_FOR_INTERRUPTS(); ! pagefn(&stat, rel, blkno, true); } } *************** pgstat_index(Relation rel, BlockNumber s *** 581,605 **** */ static void pgstat_index_page(pgstattuple_type *stat, Page page, ! OffsetNumber minoff, OffsetNumber maxoff) { OffsetNumber i; stat->free_space += PageGetFreeSpace(page); ! for (i = minoff; i <= maxoff; i = OffsetNumberNext(i)) { ! ItemId itemid = PageGetItemId(page, i); ! if (ItemIdIsDead(itemid)) { ! stat->dead_tuple_count++; ! stat->dead_tuple_len += ItemIdGetLength(itemid); } ! else { ! stat->tuple_count++; ! stat->tuple_len += ItemIdGetLength(itemid); } } } --- 586,808 ---- */ static void pgstat_index_page(pgstattuple_type *stat, Page page, ! OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters) { OffsetNumber i; stat->free_space += PageGetFreeSpace(page); ! if (enable_counters) { ! for (i = minoff; i <= maxoff; i = OffsetNumberNext(i)) ! { ! ItemId itemid = PageGetItemId(page, i); ! ! if (ItemIdIsDead(itemid)) ! { ! stat->dead_tuple_count++; ! stat->dead_tuple_len += ItemIdGetLength(itemid); ! } ! else ! { ! stat->tuple_count++; ! stat->tuple_len += ItemIdGetLength(itemid); ! } ! } ! } ! } ! /* ! * relation_free_space ! * ! * Returns the percentage of free space for a given relation. ! */ ! Datum ! relation_free_space(PG_FUNCTION_ARGS) ! { ! text *relname = PG_GETARG_TEXT_P(0); ! RangeVar *relrv; ! Relation rel; ! float4 free_space = 0; ! ! relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); ! rel = relation_openrv(relrv, AccessShareLock); ! ! /* Only allow tables or indexes */ ! if (rel->rd_rel->relkind != RELKIND_INDEX && ! rel->rd_rel->relkind != RELKIND_RELATION && ! rel->rd_rel->relkind != RELKIND_TOASTVALUE) ! ereport(ERROR, ! (errcode(ERRCODE_WRONG_OBJECT_TYPE), ! errmsg("cannot get free space from object \"%s\"", ! RelationGetRelationName(rel)))); ! ! /* ! * Reject attempts to read non-local temporary relations. We would be ! * likely to get wrong data, since we have no visibility into the owning ! * session's local buffers. ! */ ! if (RELATION_IS_OTHER_TEMP(rel)) ! ereport(ERROR, ! (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), ! errmsg("cannot access temporary tables of other sessions"))); ! ! switch (rel->rd_rel->relkind) ! { ! case RELKIND_RELATION: ! case RELKIND_TOASTVALUE: ! free_space = GetHeapRelationFreeSpace(rel); ! break; ! case RELKIND_INDEX: ! free_space = GetIndexRelationFreeSpace(rel); ! break; ! default: ! /* we are not supposed to get here */ ! ereport(ERROR, ! (errcode(ERRCODE_WRONG_OBJECT_TYPE), ! errmsg("cannot get free space from object \"%s\"", ! RelationGetRelationName(rel)))); ! } ! relation_close(rel, AccessShareLock); ! ! PG_RETURN_FLOAT4(free_space); ! } ! ! /* ! * GetHeapRelationFreeSpace() ! * ! * Get the free space for a heap relation. ! * This is a helper function for relation_free_space() ! */ ! static float4 ! GetHeapRelationFreeSpace(Relation rel) ! { ! Buffer buffer; ! Page page; ! BlockNumber blkno = 0; ! BlockNumber nblocks; ! Size free_space = 0; ! double free_percent = 0; ! ! /* prepare access strategy for reading the table */ ! bstrategy = GetAccessStrategy(BAS_BULKREAD); ! ! for (;;) ! { ! /* Get the current relation length */ ! LockRelationForExtension(rel, ExclusiveLock); ! nblocks = RelationGetNumberOfBlocks(rel); ! UnlockRelationForExtension(rel, ExclusiveLock); ! ! /* Quit if we've scanned the whole relation */ ! if (blkno >= nblocks) { ! break; } ! ! for (; blkno < nblocks; blkno++) { ! CHECK_FOR_INTERRUPTS(); ! ! buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); ! LockBuffer(buffer, BUFFER_LOCK_SHARE); ! ! page = BufferGetPage(buffer); ! free_space += PageGetHeapFreeSpace(page); ! ! UnlockReleaseBuffer(buffer); } } + + if (nblocks > 0) + free_percent = ((float4) free_space) / (nblocks * BLCKSZ); + + return free_percent; + } + + /*------------------------------------------------ + * GetIndexRelationFreeSpace + * + * Look the type of the index and call the appropiate + * function to get the free space + *------------------------------------------------ + */ + static float4 + GetIndexRelationFreeSpace(Relation rel) + { + BlockNumber nblocks; + BlockNumber blkno; + pgstattuple_type stat = {0}; + double free_percent = 0; + + /* prepare access strategy for reading the index */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + /* start at block */ + switch (rel->rd_rel->relam) + { + case BTREE_AM_OID: + blkno = BTREE_METAPAGE + 1; + break; + case HASH_AM_OID: + blkno = HASH_METAPAGE + 1; + break; + case GIST_AM_OID: + blkno = GIST_ROOT_BLKNO + 1; + break; + case GIN_AM_OID: + blkno = GIN_METAPAGE_BLKNO + 1; + break; + case SPGIST_AM_OID: + blkno = SPGIST_METAPAGE_BLKNO + 1; + break; + default: + blkno = 0; + } + + for (;;) + { + /* Get the current relation length */ + LockRelationForExtension(rel, ExclusiveLock); + nblocks = RelationGetNumberOfBlocks(rel); + UnlockRelationForExtension(rel, ExclusiveLock); + + /* Quit if we've scanned the whole relation */ + if (blkno >= nblocks) + { + break; + } + + for (; blkno < nblocks; blkno++) + { + CHECK_FOR_INTERRUPTS(); + + switch (rel->rd_rel->relam) + { + case BTREE_AM_OID: + pgstat_btree_page(&stat, rel, blkno, false); + break; + case HASH_AM_OID: + pgstat_hash_page(&stat, rel, blkno, false); + break; + case GIST_AM_OID: + pgstat_gist_page(&stat, rel, blkno, false); + break; + case GIN_AM_OID: + pgstat_gin_page(&stat, rel, blkno, false); + break; + case SPGIST_AM_OID: + pgstat_spgist_page(&stat, rel, blkno, false); + break; + default: + elog(ERROR, "Unknown index type %d", rel->rd_rel->relam); + break; + } + } + } + + if (nblocks > 0) + free_percent = ((float4) stat.free_space) / (nblocks * BLCKSZ); + + return free_percent; } diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control index 7b5129b..fcfd36f 100644 *** a/contrib/pgstattuple/pgstattuple.control --- b/contrib/pgstattuple/pgstattuple.control *************** *** 1,5 **** # pgstattuple extension comment = 'show tuple-level statistics' ! default_version = '1.0' module_pathname = '$libdir/pgstattuple' relocatable = true --- 1,5 ---- # pgstattuple extension comment = 'show tuple-level statistics' ! default_version = '1.1' module_pathname = '$libdir/pgstattuple' relocatable = true diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml index a55b35c..4e99a54 100644 *** a/doc/src/sgml/pgstattuple.sgml --- b/doc/src/sgml/pgstattuple.sgml *************** leaf_fragmentation | 0 *** 256,261 **** --- 256,277 ---- </para> </listitem> </varlistentry> + + <varlistentry> + <term> + <function>relation_free_space(relname text) returns float</function> + </term> + + <listitem> + <para> + <function>relation_free_space</function> returns a factor that represents + the total amount of free space in a table or index. The entire relation + is scanned in order to compute this amount, which may generate a large + amount of disk reads. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2>
diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql index d94c20f..08507c1 100644 *** a/contrib/pgstattuple/pgstattuple--1.1.sql --- b/contrib/pgstattuple/pgstattuple--1.1.sql *************** LANGUAGE C STRICT; *** 51,57 **** -- -- relation_free_space() -- ! CREATE FUNCTION relation_free_space(IN relname text) RETURNS real AS 'MODULE_PATHNAME', 'relation_free_space' LANGUAGE C STRICT; --- 51,57 ---- -- -- relation_free_space() -- ! CREATE FUNCTION relation_free_space(IN relname text, IN stats_target int DEFAULT 100) RETURNS real AS 'MODULE_PATHNAME', 'relation_free_space' LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c index 1001035..8649b6a 100644 *** a/contrib/pgstattuple/pgstattuple.c --- b/contrib/pgstattuple/pgstattuple.c *************** static Datum pgstat_index(Relation rel, *** 86,93 **** static void pgstat_index_page(pgstattuple_type *stat, Page page, OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters); ! static float4 GetHeapRelationFreeSpace(Relation rel); ! static float4 GetIndexRelationFreeSpace(Relation rel); /* * Buffer access strategy for reading relations, it's simpler to keep it --- 86,94 ---- static void pgstat_index_page(pgstattuple_type *stat, Page page, OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters); ! static float4 GetHeapRelationFreeSpace(Relation rel, int32 stats_target); ! static float4 GetIndexRelationFreeSpace(Relation rel, int32 stats_target); ! static BlockNumber get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target); /* * Buffer access strategy for reading relations, it's simpler to keep it *************** Datum *** 621,630 **** --- 622,637 ---- relation_free_space(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); + int32 stats_target = PG_GETARG_INT32(1); RangeVar *relrv; Relation rel; float4 free_space = 0; + if (stats_target < 1 || stats_target > 100) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("parameter stats_target should be between 1 and 100"))); + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); *************** relation_free_space(PG_FUNCTION_ARGS) *** 651,660 **** { case RELKIND_RELATION: case RELKIND_TOASTVALUE: ! free_space = GetHeapRelationFreeSpace(rel); break; case RELKIND_INDEX: ! free_space = GetIndexRelationFreeSpace(rel); break; default: /* we are not supposed to get here */ --- 658,667 ---- { case RELKIND_RELATION: case RELKIND_TOASTVALUE: ! free_space = GetHeapRelationFreeSpace(rel, stats_target); break; case RELKIND_INDEX: ! free_space = GetIndexRelationFreeSpace(rel, stats_target); break; default: /* we are not supposed to get here */ *************** relation_free_space(PG_FUNCTION_ARGS) *** 675,686 **** * This is a helper function for relation_free_space() */ static float4 ! GetHeapRelationFreeSpace(Relation rel) { Buffer buffer; Page page; BlockNumber blkno = 0; BlockNumber nblocks; Size free_space = 0; double free_percent = 0; --- 682,695 ---- * This is a helper function for relation_free_space() */ static float4 ! GetHeapRelationFreeSpace(Relation rel, int32 stats_target) { Buffer buffer; Page page; BlockNumber blkno = 0; BlockNumber nblocks; + BlockNumber totalBlocksInRelation; + int blocks_read = 0; Size free_space = 0; double free_percent = 0; *************** GetHeapRelationFreeSpace(Relation rel) *** 691,716 **** { /* Get the current relation length */ LockRelationForExtension(rel, ExclusiveLock); ! nblocks = RelationGetNumberOfBlocks(rel); UnlockRelationForExtension(rel, ExclusiveLock); /* Quit if we've scanned the whole relation */ if (blkno >= nblocks) { break; } ! for (; blkno < nblocks; blkno++) { CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); free_space += PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buffer); } } --- 700,733 ---- { /* Get the current relation length */ LockRelationForExtension(rel, ExclusiveLock); ! totalBlocksInRelation = RelationGetNumberOfBlocks(rel); UnlockRelationForExtension(rel, ExclusiveLock); + nblocks = totalBlocksInRelation * stats_target / 100; + /* Quit if we've scanned the whole relation */ if (blkno >= nblocks) { break; } ! for (; blkno < nblocks; ) { CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); + blocks_read++; page = BufferGetPage(buffer); free_space += PageGetHeapFreeSpace(page); UnlockReleaseBuffer(buffer); + + if (stats_target == 100) + blkno++; + else + blkno = get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target); } } *************** GetHeapRelationFreeSpace(Relation rel) *** 728,737 **** *------------------------------------------------ */ static float4 ! GetIndexRelationFreeSpace(Relation rel) { - BlockNumber nblocks; BlockNumber blkno; pgstattuple_type stat = {0}; double free_percent = 0; --- 745,756 ---- *------------------------------------------------ */ static float4 ! GetIndexRelationFreeSpace(Relation rel, int32 stats_target) { BlockNumber blkno; + BlockNumber nblocks; + BlockNumber totalBlocksInRelation; + int blocks_read = 0; pgstattuple_type stat = {0}; double free_percent = 0; *************** GetIndexRelationFreeSpace(Relation rel) *** 764,779 **** { /* Get the current relation length */ LockRelationForExtension(rel, ExclusiveLock); ! nblocks = RelationGetNumberOfBlocks(rel); UnlockRelationForExtension(rel, ExclusiveLock); /* Quit if we've scanned the whole relation */ if (blkno >= nblocks) { break; } ! for (; blkno < nblocks; blkno++) { CHECK_FOR_INTERRUPTS(); --- 783,800 ---- { /* Get the current relation length */ LockRelationForExtension(rel, ExclusiveLock); ! totalBlocksInRelation = RelationGetNumberOfBlocks(rel); UnlockRelationForExtension(rel, ExclusiveLock); + nblocks = totalBlocksInRelation * stats_target / 100; + /* Quit if we've scanned the whole relation */ if (blkno >= nblocks) { break; } ! for (; blkno < nblocks; ) { CHECK_FOR_INTERRUPTS(); *************** GetIndexRelationFreeSpace(Relation rel) *** 798,803 **** --- 819,830 ---- elog(ERROR, "Unknown index type %d", rel->rd_rel->relam); break; } + blocks_read++; + + if (stats_target == 100) + blkno++; + else + get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target); } } *************** GetIndexRelationFreeSpace(Relation rel) *** 806,808 **** --- 833,870 ---- return free_percent; } + + static BlockNumber + get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target) + { + BlockNumber K = nblocks - blkno; /* remaining blocks */ + int k = (nblocks * stats_target / 100) - blocks_read; /* blocks still to sample */ + double p; /* probability to skip block */ + double V; + + if (k > K) + { + /* need all the rest */ + return blkno + 1; + } + + V = ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2); + p = 1.0 - (double) k / (double) K; + while (V < p) + { + if (k > K) + { + /* need all the rest */ + return blkno + 1; + } + + /* skip */ + blkno++; + K--; /* keep K == N - t */ + + /* adjust p to be new cutoff point in reduced range */ + p *= 1.0 - (double) k / (double) K; + } + + return blkno + 1; + }
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers