On Wed, Jan 25, 2012 at 9:47 PM, Noah Misch <n...@leadboat.com> wrote:
>
> With all that done, run some quick benchmarks: see how "SELECT free_percent
> FROM pgstattuple(rel)" fares compared to "SELECT relation_free_space(rel)" for
> a large heap and for a large B-tree index.  If the timing difference is too
> small to be interesting to you, remove relation_free_space() and submit your
> pgstattuple() improvements alone.  Otherwise, submit as written.
>

Ok. I split this in three patches.

1) pgstattuple-gin_spgist.patch
This first patch adds gin and spgist support to pgstattuple, also
makes pgstattuple use a ring buffer when reading tables or indexes.

2) pgstattuple-relation_free_space.patch
This patch adds the relation_free_space function to pgstattuple.

the function relation_free_space() is faster than pgstattuple(), to
test that i initialize pgbench with a scale of 40.
In that context pgstattuple() tooks 1.4s to process pgbench_account
table and relation_free_space() tooks 730ms (half the time!)
In the index the difference is less notorious, 170ms the former and
150ms the latter.

3) pgstattuple-stats_target.patch
This patch adds a stats_target parameter to the relation_free_space()
function, it mimics the way analyze choose the blocks to read and is
faster than plain relation_free_space() but of course could be inexact
if the pages that we don't read are the ones with more free space

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index beff1b9..9f2ec1f 100644
*** a/contrib/pgstattuple/pgstatindex.c
--- b/contrib/pgstattuple/pgstatindex.c
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 95,100 ****
--- 95,101 ----
  	BlockNumber nblocks;
  	BlockNumber blkno;
  	BTIndexStat indexStat;
+  	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
  
  	if (!superuser())
  		ereport(ERROR,
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 122,128 ****
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBuffer(rel, 0);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
--- 123,129 ----
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
*************** pgstatindex(PG_FUNCTION_ARGS)
*** 159,165 ****
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
! 		buffer = ReadBuffer(rel, blkno);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
--- 160,166 ----
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
!  		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index e5ddd87..6bbc957 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
***************
*** 24,33 ****
--- 24,35 ----
  
  #include "postgres.h"
  
+ #include "access/gin_private.h"
  #include "access/gist_private.h"
  #include "access/hash.h"
  #include "access/nbtree.h"
  #include "access/relscan.h"
+ #include "access/spgist_private.h"
  #include "catalog/namespace.h"
  #include "funcapi.h"
  #include "miscadmin.h"
*************** static void pgstat_hash_page(pgstattuple
*** 73,83 ****
--- 75,96 ----
  				 Relation rel, BlockNumber blkno);
  static void pgstat_gist_page(pgstattuple_type *stat,
  				 Relation rel, BlockNumber blkno);
+ static void pgstat_gin_page(pgstattuple_type *stat,
+ 				 Relation rel, BlockNumber blkno);
+ static void pgstat_spgist_page(pgstattuple_type *stat,
+ 				 Relation rel, BlockNumber blkno);
  static Datum pgstat_index(Relation rel, BlockNumber start,
  			 pgstat_page pagefn, FunctionCallInfo fcinfo);
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
  				  OffsetNumber minoff, OffsetNumber maxoff);
  
+ /* 
+  * Buffer access strategy for reading relations, it's simpler to keep it
+  * global because pgstat_*_page() functions read one buffer at a time.
+  * pgstat_heap() and pgstat_index() should initialize it before use.
+  */
+ BufferAccessStrategy bstrategy;
+ 
  /*
   * build_pgstattuple_type -- build a pgstattuple_type tuple
   */
*************** pgstat_relation(Relation rel, FunctionCa
*** 229,235 ****
  					return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
  										pgstat_gist_page, fcinfo);
  				case GIN_AM_OID:
! 					err = "gin index";
  					break;
  				default:
  					err = "unknown index";
--- 242,253 ----
  					return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
  										pgstat_gist_page, fcinfo);
  				case GIN_AM_OID:
! 					return pgstat_index(rel, GIN_METAPAGE_BLKNO + 1,
! 										pgstat_gin_page, fcinfo);
! 					break;
! 				case SPGIST_AM_OID:
! 					return pgstat_index(rel, SPGIST_METAPAGE_BLKNO + 1,
! 										pgstat_spgist_page, fcinfo);
  					break;
  				default:
  					err = "unknown index";
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 276,281 ****
--- 294,302 ----
  
  	nblocks = scan->rs_nblocks; /* # blocks to be scanned */
  
+ 	/* prepare access strategy for this table */
+ 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 
  	/* scan the relation */
  	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
  	{
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 309,315 ****
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			buffer = ReadBuffer(rel, block);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
  			stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  			UnlockReleaseBuffer(buffer);
--- 330,336 ----
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
  			stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  			UnlockReleaseBuffer(buffer);
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 322,328 ****
  	{
  		CHECK_FOR_INTERRUPTS();
  
! 		buffer = ReadBuffer(rel, block);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  		stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  		UnlockReleaseBuffer(buffer);
--- 343,349 ----
  	{
  		CHECK_FOR_INTERRUPTS();
  
! 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  		stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  		UnlockReleaseBuffer(buffer);
*************** pgstat_btree_page(pgstattuple_type *stat
*** 345,351 ****
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBuffer(rel, blkno);
  	LockBuffer(buf, BT_READ);
  	page = BufferGetPage(buf);
  
--- 366,372 ----
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  	LockBuffer(buf, BT_READ);
  	page = BufferGetPage(buf);
  
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 389,395 ****
  	Page		page;
  
  	_hash_getlock(rel, blkno, HASH_SHARE);
! 	buf = _hash_getbuf(rel, blkno, HASH_READ, 0);
  	page = BufferGetPage(buf);
  
  	if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData)))
--- 410,416 ----
  	Page		page;
  
  	_hash_getlock(rel, blkno, HASH_SHARE);
! 	buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy);
  	page = BufferGetPage(buf);
  
  	if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData)))
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 431,437 ****
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBuffer(rel, blkno);
  	LockBuffer(buf, GIST_SHARE);
  	gistcheckpage(rel, buf);
  	page = BufferGetPage(buf);
--- 452,458 ----
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  	LockBuffer(buf, GIST_SHARE);
  	gistcheckpage(rel, buf);
  	page = BufferGetPage(buf);
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 450,455 ****
--- 471,538 ----
  }
  
  /*
+  * pgstat_gin_page -- check tuples in a gin page
+  */
+ static void
+ pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
+ {
+ 	Buffer		buf;
+ 	Page		page;
+ 
+ 	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 	LockBuffer(buf, GIN_SHARE);
+ 	page = BufferGetPage(buf);
+ 
+ 	if (GinPageIsDeleted(page))
+ 	{
+ 		/* recyclable page */
+ 		stat->free_space += BLCKSZ; 
+ 	}
+ 	else if (GinPageIsLeaf(page) || GinPageIsData(page))
+ 	{
+ 		pgstat_index_page(stat, page, FirstOffsetNumber,
+ 						  PageGetMaxOffsetNumber(page));
+ 	}
+ 	else
+ 	{
+ 		/* root or node */
+ 	}
+ 
+ 	UnlockReleaseBuffer(buf);
+ }
+ 
+ /*
+  * pgstat_spgist_page -- check tuples in a spgist page
+  */
+ static void
+ pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
+ {
+ 	Buffer		buf;
+ 	Page		page;
+ 
+ 	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 	LockBuffer(buf, BUFFER_LOCK_SHARE);
+ 	page = BufferGetPage(buf);
+ 
+ 	if (SpGistPageIsDeleted(page))
+ 	{
+ 		/* recyclable page */
+ 	    stat->free_space += BLCKSZ;
+ 	}
+ 	else if (SpGistPageIsLeaf(page))
+ 	{
+ 		pgstat_index_page(stat, page, FirstOffsetNumber,
+ 						  PageGetMaxOffsetNumber(page));
+ 	}
+ 	else
+ 	{
+ 		/* root or node */
+ 	}
+ 
+ 	UnlockReleaseBuffer(buf);
+ }
+ 
+ /*
   * pgstat_index -- returns live/dead tuples info in a generic index
   */
  static Datum
*************** pgstat_index(Relation rel, BlockNumber s
*** 460,465 ****
--- 543,551 ----
  	BlockNumber blkno;
  	pgstattuple_type stat = {0};
  
+ 	/* prepare access strategy for this index */
+ 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 
  	blkno = start;
  	for (;;)
  	{
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 6ac2775..75c4405 100644
*** a/contrib/pgstattuple/Makefile
--- b/contrib/pgstattuple/Makefile
*************** MODULE_big	= pgstattuple
*** 4,10 ****
  OBJS		= pgstattuple.o pgstatindex.o
  
  EXTENSION = pgstattuple
! DATA = pgstattuple--1.0.sql pgstattuple--unpackaged--1.0.sql
  
  REGRESS = pgstattuple
  
--- 4,13 ----
  OBJS		= pgstattuple.o pgstatindex.o
  
  EXTENSION = pgstattuple
! DATA = pgstattuple--1.0.sql pgstattuple--1.1.sql \
!        pgstattuple--1.0--1.1.sql \
!        pgstattuple--unpackaged--1.0.sql 
! 		
  
  REGRESS = pgstattuple
  
diff --git a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
index ...d7ad6ca .
*** a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql
***************
*** 0 ****
--- 1,12 ----
+ /* contrib/pgstattuple/pgstattuple--1.0--1.1.sql */
+ 
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.1'" to load this file. \quit
+ 
+ --
+ -- relation_free_space()
+ --
+ CREATE FUNCTION relation_free_space(IN relname text)
+ RETURNS real
+ AS 'MODULE_PATHNAME', 'relation_free_space'
+ LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql
index ...d94c20f .
*** a/contrib/pgstattuple/pgstattuple--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.1.sql
***************
*** 0 ****
--- 1,57 ----
+ /* contrib/pgstattuple/pgstattuple--1.0.sql */
+ 
+ -- complain if script is sourced in psql, rather than via CREATE EXTENSION
+ \echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit
+ 
+ CREATE FUNCTION pgstattuple(IN relname text,
+     OUT table_len BIGINT,		-- physical table length in bytes
+     OUT tuple_count BIGINT,		-- number of live tuples
+     OUT tuple_len BIGINT,		-- total tuples length in bytes
+     OUT tuple_percent FLOAT8,		-- live tuples in %
+     OUT dead_tuple_count BIGINT,	-- number of dead tuples
+     OUT dead_tuple_len BIGINT,		-- total dead tuples length in bytes
+     OUT dead_tuple_percent FLOAT8,	-- dead tuples in %
+     OUT free_space BIGINT,		-- free space in bytes
+     OUT free_percent FLOAT8)		-- free space in %
+ AS 'MODULE_PATHNAME', 'pgstattuple'
+ LANGUAGE C STRICT;
+ 
+ CREATE FUNCTION pgstattuple(IN reloid oid,
+     OUT table_len BIGINT,		-- physical table length in bytes
+     OUT tuple_count BIGINT,		-- number of live tuples
+     OUT tuple_len BIGINT,		-- total tuples length in bytes
+     OUT tuple_percent FLOAT8,		-- live tuples in %
+     OUT dead_tuple_count BIGINT,	-- number of dead tuples
+     OUT dead_tuple_len BIGINT,		-- total dead tuples length in bytes
+     OUT dead_tuple_percent FLOAT8,	-- dead tuples in %
+     OUT free_space BIGINT,		-- free space in bytes
+     OUT free_percent FLOAT8)		-- free space in %
+ AS 'MODULE_PATHNAME', 'pgstattuplebyid'
+ LANGUAGE C STRICT;
+ 
+ CREATE FUNCTION pgstatindex(IN relname text,
+     OUT version INT,
+     OUT tree_level INT,
+     OUT index_size BIGINT,
+     OUT root_block_no BIGINT,
+     OUT internal_pages BIGINT,
+     OUT leaf_pages BIGINT,
+     OUT empty_pages BIGINT,
+     OUT deleted_pages BIGINT,
+     OUT avg_leaf_density FLOAT8,
+     OUT leaf_fragmentation FLOAT8)
+ AS 'MODULE_PATHNAME', 'pgstatindex'
+ LANGUAGE C STRICT;
+ 
+ CREATE FUNCTION pg_relpages(IN relname text)
+ RETURNS BIGINT
+ AS 'MODULE_PATHNAME', 'pg_relpages'
+ LANGUAGE C STRICT;
+ 
+ --
+ -- relation_free_space()
+ --
+ CREATE FUNCTION relation_free_space(IN relname text)
+ RETURNS real
+ AS 'MODULE_PATHNAME', 'relation_free_space'
+ LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index 89ec4da..1001035 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
*************** PG_MODULE_MAGIC;
*** 43,51 ****
--- 43,53 ----
  
  PG_FUNCTION_INFO_V1(pgstattuple);
  PG_FUNCTION_INFO_V1(pgstattuplebyid);
+ PG_FUNCTION_INFO_V1(relation_free_space);
  
  extern Datum pgstattuple(PG_FUNCTION_ARGS);
  extern Datum pgstattuplebyid(PG_FUNCTION_ARGS);
+ extern Datum relation_free_space(PG_FUNCTION_ARGS);
  
  /*
   * struct pgstattuple_type
*************** typedef struct pgstattuple_type
*** 63,93 ****
  	uint64		free_space;		/* free/reusable space in bytes */
  } pgstattuple_type;
  
! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber);
  
  static Datum build_pgstattuple_type(pgstattuple_type *stat,
  					   FunctionCallInfo fcinfo);
  static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo);
  static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo);
  static void pgstat_btree_page(pgstattuple_type *stat,
! 				  Relation rel, BlockNumber blkno);
  static void pgstat_hash_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno);
  static void pgstat_gist_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno);
  static void pgstat_gin_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno);
  static void pgstat_spgist_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno);
  static Datum pgstat_index(Relation rel, BlockNumber start,
  			 pgstat_page pagefn, FunctionCallInfo fcinfo);
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
! 				  OffsetNumber minoff, OffsetNumber maxoff);
  
  /* 
   * Buffer access strategy for reading relations, it's simpler to keep it
   * global because pgstat_*_page() functions read one buffer at a time.
!  * pgstat_heap() and pgstat_index() should initialize it before use.
   */
  BufferAccessStrategy bstrategy;
  
--- 65,98 ----
  	uint64		free_space;		/* free/reusable space in bytes */
  } pgstattuple_type;
  
! typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber, bool);
  
  static Datum build_pgstattuple_type(pgstattuple_type *stat,
  					   FunctionCallInfo fcinfo);
  static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo);
  static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo);
  static void pgstat_btree_page(pgstattuple_type *stat,
! 				  Relation rel, BlockNumber blkno, bool enable_counters);
  static void pgstat_hash_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno, bool enable_counters);
  static void pgstat_gist_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno, bool enable_counters);
  static void pgstat_gin_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno, bool enable_counters);
  static void pgstat_spgist_page(pgstattuple_type *stat,
! 				 Relation rel, BlockNumber blkno, bool enable_counters);
  static Datum pgstat_index(Relation rel, BlockNumber start,
  			 pgstat_page pagefn, FunctionCallInfo fcinfo);
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
! 				  OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
! 
! static float4 GetHeapRelationFreeSpace(Relation rel);
! static float4 GetIndexRelationFreeSpace(Relation rel);
  
  /* 
   * Buffer access strategy for reading relations, it's simpler to keep it
   * global because pgstat_*_page() functions read one buffer at a time.
!  * every consumer of bstrategy should initialize it before use.
   */
  BufferAccessStrategy bstrategy;
  
*************** pgstat_heap(Relation rel, FunctionCallIn
*** 362,368 ****
   * pgstat_btree_page -- check tuples in a btree page
   */
  static void
! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
  {
  	Buffer		buf;
  	Page		page;
--- 367,373 ----
   * pgstat_btree_page -- check tuples in a btree page
   */
  static void
! pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
  {
  	Buffer		buf;
  	Page		page;
*************** pgstat_btree_page(pgstattuple_type *stat
*** 390,396 ****
  		else if (P_ISLEAF(opaque))
  		{
  			pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque),
! 							  PageGetMaxOffsetNumber(page));
  		}
  		else
  		{
--- 395,401 ----
  		else if (P_ISLEAF(opaque))
  		{
  			pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque),
! 							  PageGetMaxOffsetNumber(page), enable_counters);
  		}
  		else
  		{
*************** pgstat_btree_page(pgstattuple_type *stat
*** 405,411 ****
   * pgstat_hash_page -- check tuples in a hash page
   */
  static void
! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
  {
  	Buffer		buf;
  	Page		page;
--- 410,416 ----
   * pgstat_hash_page -- check tuples in a hash page
   */
  static void
! pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
  {
  	Buffer		buf;
  	Page		page;
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 427,433 ****
  			case LH_BUCKET_PAGE:
  			case LH_OVERFLOW_PAGE:
  				pgstat_index_page(stat, page, FirstOffsetNumber,
! 								  PageGetMaxOffsetNumber(page));
  				break;
  			case LH_BITMAP_PAGE:
  			case LH_META_PAGE:
--- 432,438 ----
  			case LH_BUCKET_PAGE:
  			case LH_OVERFLOW_PAGE:
  				pgstat_index_page(stat, page, FirstOffsetNumber,
! 								  PageGetMaxOffsetNumber(page), enable_counters);
  				break;
  			case LH_BITMAP_PAGE:
  			case LH_META_PAGE:
*************** pgstat_hash_page(pgstattuple_type *stat,
*** 448,454 ****
   * pgstat_gist_page -- check tuples in a gist page
   */
  static void
! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
  {
  	Buffer		buf;
  	Page		page;
--- 453,459 ----
   * pgstat_gist_page -- check tuples in a gist page
   */
  static void
! pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
  {
  	Buffer		buf;
  	Page		page;
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 461,467 ****
  	if (GistPageIsLeaf(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page));
  	}
  	else
  	{
--- 466,472 ----
  	if (GistPageIsLeaf(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page), enable_counters);
  	}
  	else
  	{
*************** pgstat_gist_page(pgstattuple_type *stat,
*** 475,481 ****
   * pgstat_gin_page -- check tuples in a gin page
   */
  static void
! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
  {
  	Buffer		buf;
  	Page		page;
--- 480,486 ----
   * pgstat_gin_page -- check tuples in a gin page
   */
  static void
! pgstat_gin_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
  {
  	Buffer		buf;
  	Page		page;
*************** pgstat_gin_page(pgstattuple_type *stat,
*** 492,498 ****
  	else if (GinPageIsLeaf(page) || GinPageIsData(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page));
  	}
  	else
  	{
--- 497,503 ----
  	else if (GinPageIsLeaf(page) || GinPageIsData(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page), enable_counters);
  	}
  	else
  	{
*************** pgstat_gin_page(pgstattuple_type *stat,
*** 506,512 ****
   * pgstat_spgist_page -- check tuples in a spgist page
   */
  static void
! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno)
  {
  	Buffer		buf;
  	Page		page;
--- 511,517 ----
   * pgstat_spgist_page -- check tuples in a spgist page
   */
  static void
! pgstat_spgist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, bool enable_counters)
  {
  	Buffer		buf;
  	Page		page;
*************** pgstat_spgist_page(pgstattuple_type *sta
*** 523,529 ****
  	else if (SpGistPageIsLeaf(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page));
  	}
  	else
  	{
--- 528,534 ----
  	else if (SpGistPageIsLeaf(page))
  	{
  		pgstat_index_page(stat, page, FirstOffsetNumber,
! 						  PageGetMaxOffsetNumber(page), enable_counters);
  	}
  	else
  	{
*************** pgstat_index(Relation rel, BlockNumber s
*** 567,573 ****
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			pagefn(&stat, rel, blkno);
  		}
  	}
  
--- 572,578 ----
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			pagefn(&stat, rel, blkno, true);
  		}
  	}
  
*************** pgstat_index(Relation rel, BlockNumber s
*** 581,605 ****
   */
  static void
  pgstat_index_page(pgstattuple_type *stat, Page page,
! 				  OffsetNumber minoff, OffsetNumber maxoff)
  {
  	OffsetNumber i;
  
  	stat->free_space += PageGetFreeSpace(page);
  
! 	for (i = minoff; i <= maxoff; i = OffsetNumberNext(i))
  	{
! 		ItemId		itemid = PageGetItemId(page, i);
  
! 		if (ItemIdIsDead(itemid))
  		{
! 			stat->dead_tuple_count++;
! 			stat->dead_tuple_len += ItemIdGetLength(itemid);
  		}
! 		else
  		{
! 			stat->tuple_count++;
! 			stat->tuple_len += ItemIdGetLength(itemid);
  		}
  	}
  }
--- 586,808 ----
   */
  static void
  pgstat_index_page(pgstattuple_type *stat, Page page,
! 				  OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters)
  {
  	OffsetNumber i;
  
  	stat->free_space += PageGetFreeSpace(page);
  
! 	if (enable_counters)
  	{
! 		for (i = minoff; i <= maxoff; i = OffsetNumberNext(i))
! 		{
! 			ItemId		itemid = PageGetItemId(page, i);
! 	
! 			if (ItemIdIsDead(itemid))
! 			{
! 				stat->dead_tuple_count++;
! 				stat->dead_tuple_len += ItemIdGetLength(itemid);
! 			}
! 			else
! 			{
! 				stat->tuple_count++;
! 				stat->tuple_len += ItemIdGetLength(itemid);
! 			}
! 		}
! 	}
! }
  
! /*
!  * relation_free_space
!  *
!  * Returns the percentage of free space for a given relation.
!  */
! Datum
! relation_free_space(PG_FUNCTION_ARGS)
! {
! 	text       *relname = PG_GETARG_TEXT_P(0);
! 	RangeVar	*relrv;
! 	Relation	rel;
! 	float4 		free_space = 0;
! 
! 	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
! 	rel = relation_openrv(relrv, AccessShareLock);
! 
! 	/* Only allow tables or indexes */
! 	if (rel->rd_rel->relkind != RELKIND_INDEX && 
! 		rel->rd_rel->relkind != RELKIND_RELATION &&
! 		rel->rd_rel->relkind != RELKIND_TOASTVALUE)
! 		ereport(ERROR,
! 			(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 				errmsg("cannot get free space from object \"%s\"",
! 					RelationGetRelationName(rel))));
! 
! 	/*
! 	 * Reject attempts to read non-local temporary relations.  We would be
! 	 * likely to get wrong data, since we have no visibility into the owning
! 	 * session's local buffers.
! 	 */
! 	if (RELATION_IS_OTHER_TEMP(rel))
! 		ereport(ERROR,
! 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
! 				errmsg("cannot access temporary tables of other sessions")));
! 
! 	switch (rel->rd_rel->relkind)
! 	{
! 		case RELKIND_RELATION: 
! 		case RELKIND_TOASTVALUE: 
! 			free_space = GetHeapRelationFreeSpace(rel);
! 			break;
! 		case RELKIND_INDEX: 
! 			free_space = GetIndexRelationFreeSpace(rel);
! 			break;
! 		default:
! 			/* we are not supposed to get here */
! 			ereport(ERROR,
! 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
! 					errmsg("cannot get free space from object \"%s\"",
! 						RelationGetRelationName(rel))));
! 	}
! 	relation_close(rel, AccessShareLock);
! 
! 	PG_RETURN_FLOAT4(free_space);
! }
! 
! /*
!  * GetHeapRelationFreeSpace()
!  *
!  * Get the free space for a heap relation.
!  * This is a helper function for relation_free_space()
!  */
! static float4
! GetHeapRelationFreeSpace(Relation rel)
! {
! 	Buffer      buffer;
! 	Page		page;
! 	BlockNumber blkno = 0;
! 	BlockNumber nblocks; 
! 	Size        free_space = 0;
! 	double		free_percent = 0;
! 
! 	/* prepare access strategy for reading the table */
! 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
! 
! 	for (;;)
! 	{
! 		/* Get the current relation length */
! 		LockRelationForExtension(rel, ExclusiveLock);
! 		nblocks = RelationGetNumberOfBlocks(rel);
! 		UnlockRelationForExtension(rel, ExclusiveLock);
! 
!         /* Quit if we've scanned the whole relation */
! 		if (blkno >= nblocks)
  		{
! 			break;
  		}
! 
! 		for (; blkno < nblocks; blkno++)
  		{
! 			CHECK_FOR_INTERRUPTS();
! 
! 			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
! 			LockBuffer(buffer, BUFFER_LOCK_SHARE);
! 
! 			page   = BufferGetPage(buffer);
! 			free_space += PageGetHeapFreeSpace(page);
! 
! 			UnlockReleaseBuffer(buffer);
  		}
  	}
+ 
+ 	if (nblocks > 0)
+ 		free_percent = ((float4) free_space) / (nblocks * BLCKSZ);
+ 
+ 	return free_percent;
+ }
+ 
+ /*------------------------------------------------
+  * GetIndexRelationFreeSpace
+  *
+  * Look the type of the index and call the appropiate
+  * function to get the free space
+  *------------------------------------------------
+  */
+ static float4
+ GetIndexRelationFreeSpace(Relation rel)
+ {
+ 	BlockNumber nblocks;
+ 	BlockNumber blkno;
+     pgstattuple_type stat = {0};
+ 	double		free_percent = 0;
+ 
+ 	/* prepare access strategy for reading the index */
+ 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 
+ 	/* start at block */
+ 	switch (rel->rd_rel->relam)
+ 	{
+ 		case BTREE_AM_OID:
+ 			blkno = BTREE_METAPAGE + 1;
+ 			break;
+ 		case HASH_AM_OID:
+ 			blkno = HASH_METAPAGE + 1;
+ 			break;
+ 		case GIST_AM_OID:
+ 			blkno = GIST_ROOT_BLKNO + 1;
+ 			break;
+ 		case GIN_AM_OID:
+ 			blkno = GIN_METAPAGE_BLKNO + 1;
+ 			break;
+ 		case SPGIST_AM_OID:
+ 			blkno = SPGIST_METAPAGE_BLKNO + 1;
+ 			break;
+ 		default:
+ 			blkno = 0;
+ 	}
+ 
+     for (;;)
+     {
+         /* Get the current relation length */
+         LockRelationForExtension(rel, ExclusiveLock);
+         nblocks = RelationGetNumberOfBlocks(rel);
+         UnlockRelationForExtension(rel, ExclusiveLock);
+ 
+         /* Quit if we've scanned the whole relation */
+ 		if (blkno >= nblocks)
+ 		{
+ 			break;
+ 		}
+ 
+         for (; blkno < nblocks; blkno++)
+         {
+             CHECK_FOR_INTERRUPTS();
+ 
+             switch (rel->rd_rel->relam)
+             {
+                 case BTREE_AM_OID:
+ 					pgstat_btree_page(&stat, rel, blkno, false);
+                     break;
+                 case HASH_AM_OID:
+ 					pgstat_hash_page(&stat, rel, blkno, false);
+                     break;
+                 case GIST_AM_OID:
+ 					pgstat_gist_page(&stat, rel, blkno, false);
+                     break;
+                 case GIN_AM_OID:
+ 					pgstat_gin_page(&stat, rel, blkno, false);
+                     break;
+                 case SPGIST_AM_OID:
+ 					pgstat_spgist_page(&stat, rel, blkno, false);
+                     break;
+                 default:
+ 					elog(ERROR, "Unknown index type %d", rel->rd_rel->relam);
+                     break;
+             }
+         }
+     }
+ 
+ 	if (nblocks > 0)
+ 		free_percent = ((float4) stat.free_space) / (nblocks * BLCKSZ);
+ 
+ 	return free_percent;
  }
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index 7b5129b..fcfd36f 100644
*** a/contrib/pgstattuple/pgstattuple.control
--- b/contrib/pgstattuple/pgstattuple.control
***************
*** 1,5 ****
  # pgstattuple extension
  comment = 'show tuple-level statistics'
! default_version = '1.0'
  module_pathname = '$libdir/pgstattuple'
  relocatable = true
--- 1,5 ----
  # pgstattuple extension
  comment = 'show tuple-level statistics'
! default_version = '1.1'
  module_pathname = '$libdir/pgstattuple'
  relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index a55b35c..4e99a54 100644
*** a/doc/src/sgml/pgstattuple.sgml
--- b/doc/src/sgml/pgstattuple.sgml
*************** leaf_fragmentation | 0
*** 256,261 ****
--- 256,277 ----
       </para>
      </listitem>
     </varlistentry>
+ 
+    <varlistentry>
+     <term>
+      <function>relation_free_space(relname text) returns float</function>
+     </term>
+ 
+     <listitem>
+      <para>
+       <function>relation_free_space</function> returns a factor that represents 
+       the total amount of free space in a table or index.  The entire relation 
+       is scanned in order to compute this amount, which may generate a large 
+       amount of disk reads.
+      </para>
+     </listitem>
+    </varlistentry>
+ 
    </variablelist>
   </sect2>
  
diff --git a/contrib/pgstattuple/pgstattuple--1.1.sql b/contrib/pgstattuple/pgstattuple--1.1.sql
index d94c20f..08507c1 100644
*** a/contrib/pgstattuple/pgstattuple--1.1.sql
--- b/contrib/pgstattuple/pgstattuple--1.1.sql
*************** LANGUAGE C STRICT;
*** 51,57 ****
  --
  -- relation_free_space()
  --
! CREATE FUNCTION relation_free_space(IN relname text)
  RETURNS real
  AS 'MODULE_PATHNAME', 'relation_free_space'
  LANGUAGE C STRICT;
--- 51,57 ----
  --
  -- relation_free_space()
  --
! CREATE FUNCTION relation_free_space(IN relname text, IN stats_target int DEFAULT 100)
  RETURNS real
  AS 'MODULE_PATHNAME', 'relation_free_space'
  LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index 1001035..8649b6a 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
*************** static Datum pgstat_index(Relation rel,
*** 86,93 ****
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
  				  OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
  
! static float4 GetHeapRelationFreeSpace(Relation rel);
! static float4 GetIndexRelationFreeSpace(Relation rel);
  
  /* 
   * Buffer access strategy for reading relations, it's simpler to keep it
--- 86,94 ----
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
  				  OffsetNumber minoff, OffsetNumber maxoff, bool enable_counters);
  
! static float4 GetHeapRelationFreeSpace(Relation rel, int32 stats_target);
! static float4 GetIndexRelationFreeSpace(Relation rel, int32 stats_target);
! static BlockNumber get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target);
  
  /* 
   * Buffer access strategy for reading relations, it's simpler to keep it
*************** Datum
*** 621,630 ****
--- 622,637 ----
  relation_free_space(PG_FUNCTION_ARGS)
  {
  	text       *relname = PG_GETARG_TEXT_P(0);
+ 	int32		stats_target = PG_GETARG_INT32(1);
  	RangeVar	*relrv;
  	Relation	rel;
  	float4 		free_space = 0;
  
+ 	if (stats_target < 1 || stats_target > 100)
+ 		ereport(ERROR,
+ 			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ 				errmsg("parameter stats_target should be between 1 and 100")));
+ 
  	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
  	rel = relation_openrv(relrv, AccessShareLock);
  
*************** relation_free_space(PG_FUNCTION_ARGS)
*** 651,660 ****
  	{
  		case RELKIND_RELATION: 
  		case RELKIND_TOASTVALUE: 
! 			free_space = GetHeapRelationFreeSpace(rel);
  			break;
  		case RELKIND_INDEX: 
! 			free_space = GetIndexRelationFreeSpace(rel);
  			break;
  		default:
  			/* we are not supposed to get here */
--- 658,667 ----
  	{
  		case RELKIND_RELATION: 
  		case RELKIND_TOASTVALUE: 
! 			free_space = GetHeapRelationFreeSpace(rel, stats_target);
  			break;
  		case RELKIND_INDEX: 
! 			free_space = GetIndexRelationFreeSpace(rel, stats_target);
  			break;
  		default:
  			/* we are not supposed to get here */
*************** relation_free_space(PG_FUNCTION_ARGS)
*** 675,686 ****
   * This is a helper function for relation_free_space()
   */
  static float4
! GetHeapRelationFreeSpace(Relation rel)
  {
  	Buffer      buffer;
  	Page		page;
  	BlockNumber blkno = 0;
  	BlockNumber nblocks; 
  	Size        free_space = 0;
  	double		free_percent = 0;
  
--- 682,695 ----
   * This is a helper function for relation_free_space()
   */
  static float4
! GetHeapRelationFreeSpace(Relation rel, int32 stats_target)
  {
  	Buffer      buffer;
  	Page		page;
  	BlockNumber blkno = 0;
  	BlockNumber nblocks; 
+  	BlockNumber totalBlocksInRelation; 
+ 	int			blocks_read = 0;
  	Size        free_space = 0;
  	double		free_percent = 0;
  
*************** GetHeapRelationFreeSpace(Relation rel)
*** 691,716 ****
  	{
  		/* Get the current relation length */
  		LockRelationForExtension(rel, ExclusiveLock);
! 		nblocks = RelationGetNumberOfBlocks(rel);
  		UnlockRelationForExtension(rel, ExclusiveLock);
  
          /* Quit if we've scanned the whole relation */
  		if (blkno >= nblocks)
  		{
  			break;
  		}
  
! 		for (; blkno < nblocks; blkno++)
  		{
  			CHECK_FOR_INTERRUPTS();
  
  			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  			page   = BufferGetPage(buffer);
  			free_space += PageGetHeapFreeSpace(page);
  
  			UnlockReleaseBuffer(buffer);
  		}
  	}
  
--- 700,733 ----
  	{
  		/* Get the current relation length */
  		LockRelationForExtension(rel, ExclusiveLock);
! 		totalBlocksInRelation = RelationGetNumberOfBlocks(rel);
  		UnlockRelationForExtension(rel, ExclusiveLock);
  
+ 		nblocks = totalBlocksInRelation * stats_target / 100;
+ 
          /* Quit if we've scanned the whole relation */
  		if (blkno >= nblocks)
  		{
  			break;
  		}
  
! 		for (; blkno < nblocks; )
  		{
  			CHECK_FOR_INTERRUPTS();
  
  			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ 			blocks_read++;
  
  			page   = BufferGetPage(buffer);
  			free_space += PageGetHeapFreeSpace(page);
  
  			UnlockReleaseBuffer(buffer);
+ 
+ 			if (stats_target == 100)
+ 				blkno++;
+ 			else
+ 				blkno = get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target);
  		}
  	}
  
*************** GetHeapRelationFreeSpace(Relation rel)
*** 728,737 ****
   *------------------------------------------------
   */
  static float4
! GetIndexRelationFreeSpace(Relation rel)
  {
- 	BlockNumber nblocks;
  	BlockNumber blkno;
      pgstattuple_type stat = {0};
  	double		free_percent = 0;
  
--- 745,756 ----
   *------------------------------------------------
   */
  static float4
! GetIndexRelationFreeSpace(Relation rel, int32 stats_target)
  {
  	BlockNumber blkno;
+ 	BlockNumber nblocks;
+ 	BlockNumber totalBlocksInRelation; 
+ 	int 		blocks_read = 0;
      pgstattuple_type stat = {0};
  	double		free_percent = 0;
  
*************** GetIndexRelationFreeSpace(Relation rel)
*** 764,779 ****
      {
          /* Get the current relation length */
          LockRelationForExtension(rel, ExclusiveLock);
!         nblocks = RelationGetNumberOfBlocks(rel);
          UnlockRelationForExtension(rel, ExclusiveLock);
  
          /* Quit if we've scanned the whole relation */
  		if (blkno >= nblocks)
  		{
  			break;
  		}
  
!         for (; blkno < nblocks; blkno++)
          {
              CHECK_FOR_INTERRUPTS();
  
--- 783,800 ----
      {
          /* Get the current relation length */
          LockRelationForExtension(rel, ExclusiveLock);
! 		totalBlocksInRelation = RelationGetNumberOfBlocks(rel);
          UnlockRelationForExtension(rel, ExclusiveLock);
  
+ 		nblocks = totalBlocksInRelation * stats_target / 100;
+ 
          /* Quit if we've scanned the whole relation */
  		if (blkno >= nblocks)
  		{
  			break;
  		}
  
!         for (; blkno < nblocks; )
          {
              CHECK_FOR_INTERRUPTS();
  
*************** GetIndexRelationFreeSpace(Relation rel)
*** 798,803 ****
--- 819,830 ----
  					elog(ERROR, "Unknown index type %d", rel->rd_rel->relam);
                      break;
              }
+ 			blocks_read++;
+ 
+ 			if (stats_target == 100)
+ 				blkno++;
+ 			else
+ 				get_next_block(blkno, totalBlocksInRelation, blocks_read, stats_target);
          }
      }
  
*************** GetIndexRelationFreeSpace(Relation rel)
*** 806,808 ****
--- 833,870 ----
  
  	return free_percent;
  }
+ 
+ static BlockNumber
+ get_next_block(BlockNumber blkno, BlockNumber nblocks, int blocks_read, int32 stats_target)
+ {
+ 	BlockNumber K = nblocks - blkno;								/* remaining blocks */
+ 	int         k = (nblocks * stats_target / 100) - blocks_read;	/* blocks still to sample */
+ 	double      p;              									/* probability to skip block */
+ 	double      V; 
+ 
+ 	if (k > K)
+ 	{	
+ 		/* need all the rest */
+ 		return blkno + 1;
+ 	}
+ 
+ 	V = ((double) random() + 1) / ((double) MAX_RANDOM_VALUE + 2);
+     p = 1.0 - (double) k / (double) K;
+     while (V < p)
+     {
+ 		if (k > K)
+ 		{	
+ 			/* need all the rest */
+ 			return blkno + 1;
+ 		}
+ 
+         /* skip */
+         blkno++;
+         K--;                    /* keep K == N - t */
+ 
+         /* adjust p to be new cutoff point in reduced range */
+         p *= 1.0 - (double) k / (double) K;
+     }
+ 
+ 	return blkno + 1;
+ }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to