Re: [HACKERS] Measuring relation free space

2012-03-13 Thread Robert Haas
On Mon, Mar 12, 2012 at 11:10 PM, Jaime Casanova ja...@2ndquadrant.com wrote:
 On Mon, Mar 12, 2012 at 9:41 PM, Noah Misch n...@leadboat.com wrote:

 I created a CF entry for this and marked it Ready for Committer.

 i wasn't sure if create an entry this late was a good idea or not...
 but now i feel better because is less probable that it will fall out
 on the cracks, thanks

 You left the
 bstrategy variable non-static, but that didn't seem important enough to
 justify another round trip.


 ah! i forgot that...

I committed this, but I didn't like the global variable, so I adjusted
it to pass bstrategy as a parameter where needed.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-03-12 Thread Noah Misch
On Fri, Mar 09, 2012 at 02:18:02AM -0500, Jaime Casanova wrote:
 On Wed, Feb 22, 2012 at 12:27 AM, Noah Misch n...@leadboat.com wrote:
  On Tue, Feb 14, 2012 at 02:04:26AM -0500, Jaime Casanova wrote:
 
  1) pgstattuple-gin_spgist.patch
  This first patch adds gin and spgist support to pgstattuple, also
  makes pgstattuple use a ring buffer when reading tables or indexes.
 
  The buffer access strategy usage bits look fine to commit.
 
 
 ok. i extracted that part. which basically makes pgstattuple usable in
 production (i mean, by not bloating shared buffers when using the
 function)

I created a CF entry for this and marked it Ready for Committer.  You left the
bstrategy variable non-static, but that didn't seem important enough to
justify another round trip.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-03-12 Thread Jaime Casanova
On Mon, Mar 12, 2012 at 9:41 PM, Noah Misch n...@leadboat.com wrote:

 I created a CF entry for this and marked it Ready for Committer.

i wasn't sure if create an entry this late was a good idea or not...
but now i feel better because is less probable that it will fall out
on the cracks, thanks

 You left the
 bstrategy variable non-static, but that didn't seem important enough to
 justify another round trip.


ah! i forgot that...

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-03-08 Thread Jaime Casanova
On Wed, Feb 22, 2012 at 12:27 AM, Noah Misch n...@leadboat.com wrote:
 On Tue, Feb 14, 2012 at 02:04:26AM -0500, Jaime Casanova wrote:

 1) pgstattuple-gin_spgist.patch
 This first patch adds gin and spgist support to pgstattuple, also
 makes pgstattuple use a ring buffer when reading tables or indexes.

 The buffer access strategy usage bits look fine to commit.


ok. i extracted that part. which basically makes pgstattuple usable in
production (i mean, by not bloating shared buffers when using the
function)

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
new file mode 100644
index beff1b9..9f2ec1f
*** a/contrib/pgstattuple/pgstatindex.c
--- b/contrib/pgstattuple/pgstatindex.c
*** pgstatindex(PG_FUNCTION_ARGS)
*** 95,100 
--- 95,101 
  	BlockNumber nblocks;
  	BlockNumber blkno;
  	BTIndexStat indexStat;
+  	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
  
  	if (!superuser())
  		ereport(ERROR,
*** pgstatindex(PG_FUNCTION_ARGS)
*** 122,128 
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBuffer(rel, 0);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
--- 123,129 
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
*** pgstatindex(PG_FUNCTION_ARGS)
*** 159,165 
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
! 		buffer = ReadBuffer(rel, blkno);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
--- 160,166 
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
!  		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
new file mode 100644
index e5ddd87..580e24e
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
*** static Datum pgstat_index(Relation rel,
*** 78,83 
--- 78,90 
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
    OffsetNumber minoff, OffsetNumber maxoff);
  
+ /* 
+  * Buffer access strategy for reading relations, it's simpler to keep it
+  * global because pgstat_*_page() functions read one buffer at a time.
+  * pgstat_heap() and pgstat_index() should initialize it before use.
+  */
+ BufferAccessStrategy bstrategy;
+ 
  /*
   * build_pgstattuple_type -- build a pgstattuple_type tuple
   */
*** pgstat_relation(Relation rel, FunctionCa
*** 231,236 
--- 238,246 
  case GIN_AM_OID:
  	err = gin index;
  	break;
+ case SPGIST_AM_OID:
+ 	err = spgist index;
+ 	break;
  default:
  	err = unknown index;
  	break;
*** pgstat_heap(Relation rel, FunctionCallIn
*** 276,281 
--- 286,295 
  
  	nblocks = scan-rs_nblocks; /* # blocks to be scanned */
  
+ 	/* prepare access strategy for this table */
+ 	bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 	scan-rs_strategy = bstrategy;
+ 
  	/* scan the relation */
  	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
  	{
*** pgstat_heap(Relation rel, FunctionCallIn
*** 309,315 
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			buffer = ReadBuffer(rel, block);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
  			stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  			UnlockReleaseBuffer(buffer);
--- 323,329 
  		{
  			CHECK_FOR_INTERRUPTS();
  
! 			buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
  			LockBuffer(buffer, BUFFER_LOCK_SHARE);
  			stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  			UnlockReleaseBuffer(buffer);
*** pgstat_heap(Relation rel, FunctionCallIn
*** 322,328 
  	{
  		CHECK_FOR_INTERRUPTS();
  
! 		buffer = ReadBuffer(rel, block);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  		stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  		UnlockReleaseBuffer(buffer);
--- 336,342 
  	{
  		CHECK_FOR_INTERRUPTS();
  
! 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, bstrategy);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  		stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer));
  		UnlockReleaseBuffer(buffer);
*** pgstat_btree_page(pgstattuple_type *stat
*** 345,351 
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBuffer(rel, blkno);
  	LockBuffer(buf, BT_READ);
  	page = BufferGetPage(buf);
  
--- 359,365 
  	Buffer		buf;
  	Page		page;
  
! 	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  	LockBuffer(buf, BT_READ);
  	page = 

Re: [HACKERS] Measuring relation free space

2012-02-21 Thread Noah Misch
On Tue, Feb 14, 2012 at 02:04:26AM -0500, Jaime Casanova wrote:
 On Wed, Jan 25, 2012 at 9:47 PM, Noah Misch n...@leadboat.com wrote:
 
  With all that done, run some quick benchmarks: see how SELECT free_percent
  FROM pgstattuple(rel) fares compared to SELECT relation_free_space(rel) 
  for
  a large heap and for a large B-tree index. ?If the timing difference is too
  small to be interesting to you, remove relation_free_space() and submit your
  pgstattuple() improvements alone. ?Otherwise, submit as written.
 
 
 Ok. I split this in three patches.
 
 1) pgstattuple-gin_spgist.patch
 This first patch adds gin and spgist support to pgstattuple, also
 makes pgstattuple use a ring buffer when reading tables or indexes.

The buffer access strategy usage bits look fine to commit.  The gin and spgist
support has problems, detailed below.

 2) pgstattuple-relation_free_space.patch
 This patch adds the relation_free_space function to pgstattuple.
 
 the function relation_free_space() is faster than pgstattuple(), to
 test that i initialize pgbench with a scale of 40.
 In that context pgstattuple() tooks 1.4s to process pgbench_account
 table and relation_free_space() tooks 730ms (half the time!)
 In the index the difference is less notorious, 170ms the former and
 150ms the latter.

Benchmarks lasting on the order of one second are far too short.  I tried the
first two patches on this 6914 MiB table and 4284 MiB index:

create table t(n) as select * from generate_series(1,2);
create index on t(n);

This machine has about 1 GiB of memory available for disk cache, and I used
shared_buffers = 128MB.  I used a regular assert-enabled build with
debug_assertions = off.  Timings:

pgstattuple.free_percent, heap: runtime 166.2s; answer 0.34
pgstattuple.free_percent, index:runtime 110.9s; answer 9.83
relation_free_space, heap:  runtime 165.1s; answer 
0.00838721
relation_free_space, index: runtime 108.7s; answer 2.23692

Note the disagreement in answers and the nonsensical answer from the last
test.  The numbers do line up for smaller tables and indexes that I tried.

During the pgstattuple() runs on the heap, CPU usage divided evenly between
user and iowait time.  With relation_free_space(), iowait kept above 80%.  For
the index, pgstattuple() managed 60-80% iowait and relation_free_space() again
kept above 80%.  Even so, that did not produce any significant change in
runtime.  I'm guessing that readahead was highly effective here, so the I/O
bound dictated elapsed time.

Bottom line, this patch can probably achieve 50% speedups on already-in-memory
relations.  It can reduce the contribution to CPU load, but not the elapsed
runtime, for relations we largely pull from disk.  Do those benefits justify
the additional user-visible interface?  I suppose the sort of installation
that would benefit most is one just short of the tipping point of the database
size exceeding memory size.  Larger databases could not afford either
function, and smaller databases don't need to watch bloat so closely.
Offhand, I think that the I/O savings of sampling will be the real win, and
it's not worth an extra user-visible function to get the CPU usage savings
this patch offers.  Other opinions welcome.

 3) pgstattuple-stats_target.patch
 This patch adds a stats_target parameter to the relation_free_space()
 function, it mimics the way analyze choose the blocks to read and is
 faster than plain relation_free_space() but of course could be inexact
 if the pages that we don't read are the ones with more free space

This part is a fresh submission.  It is simple enough that I have reviewed it.
It gives the expected speedup.  However, the results are wrong:

3 runs of pgstattuple('t', 5):  0.000171412, 0.000171876, 0.000169326
3 runs of pgstattuple('t', 10): 0.000336525, 0.000344404, 0.000341314

I can get an apparent infinite loop:

create table t0(n) as select * from generate_series(1,400);
create index on t0(n);

[local] test=# select relation_free_space('t0_n_idx', 100);
 relation_free_space
-
   0.0982675

Time: 133.788 ms

[local] test=# select relation_free_space('t0_n_idx', 5);
Cancel request sent
ERROR:  canceling statement due to user request
Time: 24655.481 ms


As a way forward, I suggest abandoning relation_free_space() entirely and
adding a sampling capability to pgstattuple().  There are clear gains to be
had from that method.  The gains of splitting out the free percent calculation
from the other pgstattuple() calculations seem meager.

If you would like, submit the buffer strategy bits as a separate patch with
its own CF entry, noting that it arose from here.  That part can be Ready for
Committer.  I'm marking the original CF entry Returned with Feedback.


Patch 1:

 *** a/contrib/pgstattuple/pgstatindex.c
 --- b/contrib/pgstattuple/pgstatindex.c

 + /* 
 +  * Buffer access strategy for reading relations, it's simpler to keep it
 +  

Re: [HACKERS] Measuring relation free space

2012-02-13 Thread Jaime Casanova
On Wed, Jan 25, 2012 at 9:47 PM, Noah Misch n...@leadboat.com wrote:

 With all that done, run some quick benchmarks: see how SELECT free_percent
 FROM pgstattuple(rel) fares compared to SELECT relation_free_space(rel) for
 a large heap and for a large B-tree index.  If the timing difference is too
 small to be interesting to you, remove relation_free_space() and submit your
 pgstattuple() improvements alone.  Otherwise, submit as written.


Ok. I split this in three patches.

1) pgstattuple-gin_spgist.patch
This first patch adds gin and spgist support to pgstattuple, also
makes pgstattuple use a ring buffer when reading tables or indexes.

2) pgstattuple-relation_free_space.patch
This patch adds the relation_free_space function to pgstattuple.

the function relation_free_space() is faster than pgstattuple(), to
test that i initialize pgbench with a scale of 40.
In that context pgstattuple() tooks 1.4s to process pgbench_account
table and relation_free_space() tooks 730ms (half the time!)
In the index the difference is less notorious, 170ms the former and
150ms the latter.

3) pgstattuple-stats_target.patch
This patch adds a stats_target parameter to the relation_free_space()
function, it mimics the way analyze choose the blocks to read and is
faster than plain relation_free_space() but of course could be inexact
if the pages that we don't read are the ones with more free space

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c
index beff1b9..9f2ec1f 100644
*** a/contrib/pgstattuple/pgstatindex.c
--- b/contrib/pgstattuple/pgstatindex.c
*** pgstatindex(PG_FUNCTION_ARGS)
*** 95,100 
--- 95,101 
  	BlockNumber nblocks;
  	BlockNumber blkno;
  	BTIndexStat indexStat;
+  	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
  
  	if (!superuser())
  		ereport(ERROR,
*** pgstatindex(PG_FUNCTION_ARGS)
*** 122,128 
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBuffer(rel, 0);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
--- 123,129 
  	 * Read metapage
  	 */
  	{
! 		Buffer		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy);
  		Page		page = BufferGetPage(buffer);
  		BTMetaPageData *metad = BTPageGetMeta(page);
  
*** pgstatindex(PG_FUNCTION_ARGS)
*** 159,165 
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
! 		buffer = ReadBuffer(rel, blkno);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
--- 160,166 
  		CHECK_FOR_INTERRUPTS();
  
  		/* Read and lock buffer */
!  		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
  		LockBuffer(buffer, BUFFER_LOCK_SHARE);
  
  		page = BufferGetPage(buffer);
diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c
index e5ddd87..6bbc957 100644
*** a/contrib/pgstattuple/pgstattuple.c
--- b/contrib/pgstattuple/pgstattuple.c
***
*** 24,33 
--- 24,35 
  
  #include postgres.h
  
+ #include access/gin_private.h
  #include access/gist_private.h
  #include access/hash.h
  #include access/nbtree.h
  #include access/relscan.h
+ #include access/spgist_private.h
  #include catalog/namespace.h
  #include funcapi.h
  #include miscadmin.h
*** static void pgstat_hash_page(pgstattuple
*** 73,83 
--- 75,96 
   Relation rel, BlockNumber blkno);
  static void pgstat_gist_page(pgstattuple_type *stat,
   Relation rel, BlockNumber blkno);
+ static void pgstat_gin_page(pgstattuple_type *stat,
+  Relation rel, BlockNumber blkno);
+ static void pgstat_spgist_page(pgstattuple_type *stat,
+  Relation rel, BlockNumber blkno);
  static Datum pgstat_index(Relation rel, BlockNumber start,
  			 pgstat_page pagefn, FunctionCallInfo fcinfo);
  static void pgstat_index_page(pgstattuple_type *stat, Page page,
    OffsetNumber minoff, OffsetNumber maxoff);
  
+ /* 
+  * Buffer access strategy for reading relations, it's simpler to keep it
+  * global because pgstat_*_page() functions read one buffer at a time.
+  * pgstat_heap() and pgstat_index() should initialize it before use.
+  */
+ BufferAccessStrategy bstrategy;
+ 
  /*
   * build_pgstattuple_type -- build a pgstattuple_type tuple
   */
*** pgstat_relation(Relation rel, FunctionCa
*** 229,235 
  	return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
  		pgstat_gist_page, fcinfo);
  case GIN_AM_OID:
! 	err = gin index;
  	break;
  default:
  	err = unknown index;
--- 242,253 
  	return pgstat_index(rel, GIST_ROOT_BLKNO + 1,
  		pgstat_gist_page, fcinfo);
  case GIN_AM_OID:
! 	return pgstat_index(rel, GIN_METAPAGE_BLKNO + 1,
! 		pgstat_gin_page, fcinfo);
! 	break;
! case SPGIST_AM_OID:
! 	return pgstat_index(rel, 

Re: [HACKERS] Measuring relation free space

2012-01-25 Thread Noah Misch
On Tue, Jan 24, 2012 at 11:24:08AM -0500, Jaime Casanova wrote:
 On Mon, Jan 23, 2012 at 7:18 PM, Noah Misch n...@leadboat.com wrote:
  If someone feels like
  doing it, +1 for making pgstattuple() count non-leaf free space.
 
 actually i agreed that non-leaf pages are irrelevant... i just
 confirmed that in a production system with 300GB none of the indexes
 in an 84M rows table nor in a heavily updated one has more than 1 root
 page, all the rest are deleted, half_dead or leaf. so the posibility
 of bloat coming from non-leaf pages seems very odd

FWIW, the number to look at is internal_pages from pgstatindex():

[local] test=# create table t4 (c) as select * from generate_series(1,100);
SELECT 100
[local] test=# alter table t4 add primary key(c);
NOTICE:  ALTER TABLE / ADD PRIMARY KEY will create implicit index t4_pkey for 
table t4
ALTER TABLE
[local] test=# select * from pgstatindex('t4_pkey');
-[ RECORD 1 ]--+-
version| 2
tree_level | 2
index_size | 22478848
root_block_no  | 290
internal_pages | 10
leaf_pages | 2733
empty_pages| 0
deleted_pages  | 0
avg_leaf_density   | 90.06
leaf_fragmentation | 0

So, 0.4% of this index.  They appear in proportion to the logarithm of the
total index size.  I agree that bloat centered on them is unlikely.  Counting
them would be justified, but that is a question of formal accuracy rather than
practical importance.

 but the possibility of bloat coming from the meta page doesn't exist,
 AFAIUI at least
 
 we need the most accurate value about usable free space, because the
 idea is to add a sampler mode to the function so we don't scan the
 whole relation. that's why we still need the function.

I doubt we'd add this function solely on the basis that a future improvement
will make it useful.  For the patch to go in now, it needs to be useful now.
(This is not a universal principle, but it mostly holds for low-complexity
patches like this one.)

All my comments below would also apply to such a broader patch.

 btw... pgstattuple also has the problem that it's not using a ring buffer
 
 
 attached are two patches:
 - v5: is the same original patch but only track space in leaf, deleted
 and half_dead pages
 - v5.1: adds the same for all kind of indexes (problem is that this is
 inconsistent with the fact that pageinspect only manages btree indexes
 for everything else)

Let's take a step back.  Again, what you're proposing is essentially a faster
implementation of SELECT free_percent FROM pgstattuple(rel).  If this code
belongs in core at all, it belongs in the pgstattuple module.  Share as much
infrastructure as is reasonable between the user-visible functions of that
module.  For example, I'm suspecting that the pgstat_index() call tree should
be shared, with pgstat_index_page() checking a flag to decide whether to
gather per-tuple stats.

Next, compare the bits of code that differ between pgstattuple() and
relation_free_space(), convincing yourself that the differences are justified.
Each difference will yield one of the following conclusions:

1. Your code contains an innovation that would apply to both functions.  Where
not too difficult, merge these improvements into pgstattuple().  In order for
a demonstration of your new code's better performance to be interesting, we
must fix the same low-hanging fruit in its competitor.  One example is the use
of the bulk read strategy.  Another is the support for SP-GiST.

2. Your code is missing an essential behavior of pgstattuple().  Add it to
your code.  One example is the presence of CHECK_FOR_INTERRUPTS() calls.

3. Your code behaves differently from pgstattuple() due to a fundamental
difference in their tasks.  These are the only functional differences that
ought to remain in your finished patch; please point them out in comments.
For example, pgstat_heap() visits every tuple in the heap.  You'll have no
reason to do that; pgstattuple() only needs it to calculate statistics other
than free_percent.

In particular, I call your attention to the fact that pgstattuple() takes
shared buffer content locks before examining pages.  Your proposed patch does
not do so.  I do not know with certainty whether that falls under #1 or #2.
The broad convention is to take such locks, because we elsewhere want an exact
answer.  These functions are already inexact; they make no effort to observe a
consistent snapshot of the table.  If you convince yourself that the error
arising from not locking buffers is reasonably bounded, we can lose the locks
(in both functions -- conclusion #1).  Comments would then be in order.

With all that done, run some quick benchmarks: see how SELECT free_percent
FROM pgstattuple(rel) fares compared to SELECT relation_free_space(rel) for
a large heap and for a large B-tree index.  If the timing difference is too
small to be interesting to you, remove relation_free_space() and submit your
pgstattuple() improvements alone.  Otherwise, 

Re: [HACKERS] Measuring relation free space

2012-01-24 Thread Jaime Casanova
On Mon, Jan 23, 2012 at 7:18 PM, Noah Misch n...@leadboat.com wrote:
 On Mon, Jan 23, 2012 at 04:56:24PM -0300, Alvaro Herrera wrote:

 Hm.  Leaf pages hold as much tuples as non-leaf pages, no?  I mean
 for each page element there's a value and a CTID.  In non-leaf those
 CTIDs point to other index pages, one level down the tree; in leaf pages
 they point to the heap.

 That distinction seemed important when I sent my last message, but now I agree
 that it's largely irrelevant for free space purposes.  If someone feels like
 doing it, +1 for making pgstattuple() count non-leaf free space.


actually i agreed that non-leaf pages are irrelevant... i just
confirmed that in a production system with 300GB none of the indexes
in an 84M rows table nor in a heavily updated one has more than 1 root
page, all the rest are deleted, half_dead or leaf. so the posibility
of bloat coming from non-leaf pages seems very odd

but the possibility of bloat coming from the meta page doesn't exist,
AFAIUI at least

we need the most accurate value about usable free space, because the
idea is to add a sampler mode to the function so we don't scan the
whole relation. that's why we still need the function.

btw... pgstattuple also has the problem that it's not using a ring buffer


attached are two patches:
- v5: is the same original patch but only track space in leaf, deleted
and half_dead pages
- v5.1: adds the same for all kind of indexes (problem is that this is
inconsistent with the fact that pageinspect only manages btree indexes
for everything else)

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
new file mode 100644
index 13ba6d3..63fab95
*** a/contrib/pageinspect/Makefile
--- b/contrib/pageinspect/Makefile
*** MODULE_big	= pageinspect
*** 4,10 
  OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
  
  EXTENSION = pageinspect
! DATA = pageinspect--1.0.sql pageinspect--unpackaged--1.0.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
--- 4,12 
  OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
  
  EXTENSION = pageinspect
! DATA = pageinspect--1.0.sql pageinspect--1.1.sql \
!pageinspect--1.0--1.1.sql \
!pageinspect--unpackaged--1.0.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
new file mode 100644
index dbb2158..9c0b0fb
*** a/contrib/pageinspect/btreefuncs.c
--- b/contrib/pageinspect/btreefuncs.c
***
*** 34,39 
--- 34,40 
  #include utils/builtins.h
  #include utils/rel.h
  
+ #include btreefuncs.h
  
  extern Datum bt_metap(PG_FUNCTION_ARGS);
  extern Datum bt_page_items(PG_FUNCTION_ARGS);
*** GetBTPageStatistics(BlockNumber blkno, B
*** 155,160 
--- 156,216 
  		stat-avg_item_size = 0;
  }
  
+ /*
+  * GetBTRelationFreeSpace
+  *
+  * Get the free space for a btree index.
+  * This is a helper function for relation_free_space()
+  *
+  */
+ float4
+ GetBTRelationFreeSpace(Relation rel)
+ {
+ 	BTPageStat stat;
+ 
+ 	Buffer		buffer;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	BlockNumber totalBlcksCounted = 0;
+ 	Size 		free_space = 0;
+ 	double		free_percent = 0;
+ 
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 	
+ 	/* Skip page 0 because it is a metapage */
+ 	for (blkno = 1; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 		/* 
+ 		 * get the statistics of the indexes and use that info
+ 		 * to determine free space on the page
+ 		 */
+ 		GetBTPageStatistics(blkno, buffer, stat);
+ 		/* 
+ 		 * Consider pages DELETED and HALF_DEAD as empty,
+ 		 * besides those only consider LEAF pages
+ 		 */
+ 		if (stat.type == 'd' || stat.type == 'e')
+ 		{
+ 			free_space += stat.page_size;
+ 			totalBlcksCounted++;
+ 		}
+ 		else if (stat.type == 'l')
+ 		{
+ 			free_space += stat.free_size;		
+ 			totalBlcksCounted++;
+ 		}
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksCounted  0)
+ 		free_percent = ((float4) free_space) / (totalBlcksCounted * BLCKSZ);
+ 
+ 	return free_percent;
+ }
+ 
+ 
  /* ---
   * bt_page()
   *
diff --git a/contrib/pageinspect/btreefuncs.h b/contrib/pageinspect/btreefuncs.h
new file mode 100644
index ...549f878
*** a/contrib/pageinspect/btreefuncs.h
--- b/contrib/pageinspect/btreefuncs.h
***
*** 0 
--- 1,5 
+ /*
+  * contrib/pageinspect/btreefuncs.h
+  */
+ 
+ float4 GetBTRelationFreeSpace(Relation);
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index 260ccff..8e3961d
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c

Re: [HACKERS] Measuring relation free space

2012-01-23 Thread Alvaro Herrera

Excerpts from Noah Misch's message of vie ene 20 22:33:30 -0300 2012:
 On Fri, Jan 20, 2012 at 07:03:22PM -0500, Jaime Casanova wrote:
  On Wed, Jan 18, 2012 at 7:01 PM, Noah Misch n...@leadboat.com wrote:
   On Wed, Jan 18, 2012 at 09:46:20AM -0500, Jaime Casanova wrote:
  
   ignoring all non-leaf pages still gives a considerable difference
   between pgstattuple and relation_free_space()
  
   pgstattuple() counts the single B-tree meta page as always-full, while
   relation_free_space() skips it for all purposes. ?For tiny indexes, that 
   can
   shift the percentage dramatically.
  
  
  ok, i will reformulate the question. why is fine ignoring non-leaf
  pages but is not fine to ignore the meta page?
 
 pgstattuple() figures the free_percent by adding up all space available to
 hold tuples and dividing that by the simple size of the relation.  Non-leaf
 pages and the meta page get identical treatment: both never hold tuples, so
 they do not contribute to the free space.

Hm.  Leaf pages hold as much tuples as non-leaf pages, no?  I mean
for each page element there's a value and a CTID.  In non-leaf those
CTIDs point to other index pages, one level down the tree; in leaf pages
they point to the heap.

The metapage is special in that it is not used to store any user data,
just a pointer to the root page.

-- 
Álvaro Herrera alvhe...@commandprompt.com
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-23 Thread Noah Misch
On Mon, Jan 23, 2012 at 04:56:24PM -0300, Alvaro Herrera wrote:
 Excerpts from Noah Misch's message of vie ene 20 22:33:30 -0300 2012:
  pgstattuple() figures the free_percent by adding up all space available to
  hold tuples and dividing that by the simple size of the relation.  Non-leaf
  pages and the meta page get identical treatment: both never hold tuples, so
  they do not contribute to the free space.
 
 Hm.  Leaf pages hold as much tuples as non-leaf pages, no?  I mean
 for each page element there's a value and a CTID.  In non-leaf those
 CTIDs point to other index pages, one level down the tree; in leaf pages
 they point to the heap.

That distinction seemed important when I sent my last message, but now I agree
that it's largely irrelevant for free space purposes.  If someone feels like
doing it, +1 for making pgstattuple() count non-leaf free space.

Thanks,
nm

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-20 Thread Jaime Casanova
On Wed, Jan 18, 2012 at 7:01 PM, Noah Misch n...@leadboat.com wrote:
 On Wed, Jan 18, 2012 at 09:46:20AM -0500, Jaime Casanova wrote:

 ignoring all non-leaf pages still gives a considerable difference
 between pgstattuple and relation_free_space()

 pgstattuple() counts the single B-tree meta page as always-full, while
 relation_free_space() skips it for all purposes.  For tiny indexes, that can
 shift the percentage dramatically.


ok, i will reformulate the question. why is fine ignoring non-leaf
pages but is not fine to ignore the meta page?

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-20 Thread Noah Misch
On Fri, Jan 20, 2012 at 07:03:22PM -0500, Jaime Casanova wrote:
 On Wed, Jan 18, 2012 at 7:01 PM, Noah Misch n...@leadboat.com wrote:
  On Wed, Jan 18, 2012 at 09:46:20AM -0500, Jaime Casanova wrote:
 
  ignoring all non-leaf pages still gives a considerable difference
  between pgstattuple and relation_free_space()
 
  pgstattuple() counts the single B-tree meta page as always-full, while
  relation_free_space() skips it for all purposes. ?For tiny indexes, that can
  shift the percentage dramatically.
 
 
 ok, i will reformulate the question. why is fine ignoring non-leaf
 pages but is not fine to ignore the meta page?

pgstattuple() figures the free_percent by adding up all space available to
hold tuples and dividing that by the simple size of the relation.  Non-leaf
pages and the meta page get identical treatment: both never hold tuples, so
they do not contribute to the free space.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-18 Thread Jaime Casanova
On Mon, Jan 16, 2012 at 5:09 AM, Noah Misch n...@leadboat.com wrote:

 pgstattuple()'s decision to treat half-dead pages like deleted pages is
 better.  That transient state can only end in the page's deletion.


the only page in that index has 200 records (all live 0 dead) using
half the page size (which is a leaf page and is not half dead, btw).
so, how do you justify that pgstattuple say we have just 25% of free
space?

postgres=# SELECT * from bt_page_stats('pgbench_tellers_pkey', 1);
-[ RECORD 1 ]-+-
blkno  | 1
type   | l
live_items   | 200
dead_items | 0
avg_item_size | 16
page_size   | 8192
free_size | 4148
btpo_prev| 0
btpo_next| 0
btpo| 0
btpo_flags   | 3

 I don't know about counting non-leaf pages

ignoring all non-leaf pages still gives a considerable difference
between pgstattuple and relation_free_space()

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-18 Thread Noah Misch
On Wed, Jan 18, 2012 at 09:46:20AM -0500, Jaime Casanova wrote:
 On Mon, Jan 16, 2012 at 5:09 AM, Noah Misch n...@leadboat.com wrote:
 
  pgstattuple()'s decision to treat half-dead pages like deleted pages is
  better. ?That transient state can only end in the page's deletion.
 
 
 the only page in that index has 200 records (all live 0 dead) using
 half the page size (which is a leaf page and is not half dead, btw).
 so, how do you justify that pgstattuple say we have just 25% of free
 space?
 
 postgres=# SELECT * from bt_page_stats('pgbench_tellers_pkey', 1);
 -[ RECORD 1 ]-+-
 blkno  | 1
 type   | l
 live_items   | 200
 dead_items | 0
 avg_item_size | 16
 page_size   | 8192
 free_size | 4148
 btpo_prev| 0
 btpo_next| 0
 btpo| 0
 btpo_flags   | 3
 
  I don't know about counting non-leaf pages
 
 ignoring all non-leaf pages still gives a considerable difference
 between pgstattuple and relation_free_space()

pgstattuple() counts the single B-tree meta page as always-full, while
relation_free_space() skips it for all purposes.  For tiny indexes, that can
shift the percentage dramatically.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-16 Thread Noah Misch
On Sat, Jan 14, 2012 at 02:40:46PM -0500, Jaime Casanova wrote:
 On Sat, Jan 14, 2012 at 6:26 AM, Noah Misch n...@leadboat.com wrote:
 
  - pgstattuple() and relation_free_space() should emit the same number, even 
  if
  ?that means improving pgstattuple() at the same time.
 
 yes, i just wanted to understand which one was more accurate and
 why... and give the opportunity for anyone to point my error if any

pgstattuple()'s decision to treat half-dead pages like deleted pages is
better.  That transient state can only end in the page's deletion.

I don't know about counting non-leaf pages, but I personally wouldn't revisit
pgstattuple()'s decision there.  In the indexes I've briefly surveyed, the
ratio of leaf pages to non-leaf pages is 100:1 or better.  No amount of bloat
in that 1% will matter.  Feel free to make the argument if you think
otherwise, though; I've only taken a brief look at the topic.

  - relation_free_space() belongs in the pgstattuple extension, because its 
  role
  ?is cheaper access to a single pgstattuple() metric.
 
 oh! right! so, what about just fixing the free_percent that
 pgstattuple is providing

If pgstattuple() meets your needs, you might indeed no longer need any patch.

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-14 Thread Jaime Casanova
On Thu, Dec 15, 2011 at 4:11 PM, Noah Misch n...@leadboat.com wrote:
 On Sun, Nov 06, 2011 at 10:20:49PM +0100, Bernd Helmle wrote:
 --On 6. November 2011 01:08:11 -0200 Greg Smith g...@2ndquadrant.com wrote:

 Attached patch adds a new function to the pageinspect extension for 
 measuring
 total free space, in either tables or indexes.

 I wonder if that should be done in the pgstattuple module, which output
 some similar numbers.

 Indeed, pgstattuple already claims to show precisely the same measure.  Its
 reckoning is right in line for heaps, but the proposed pageinspect function
 finds more free space in indexes:

 [local] test=# SELECT t.free_percent, relation_free_space('pg_proc'), 
 i.free_percent, relation_free_space('pg_proc_proname_args_nsp_index') FROM 
 pgstattuple('pg_proc') t, pgstattuple('pg_proc_proname_args_nsp_index') i;
  free_percent | relation_free_space | free_percent | relation_free_space
 --+-+--+-
         2.53 |           0.0253346 |         8.61 |            0.128041
 (1 row)

 Is one of those index figures simply wrong, or do they measure two senses of
 free space, both of which are interesting to DBAs?


i created a test env using pgbench -s 20 -F 90, i then create a new
table (that keep tracks actions that happens the the pgbench tables,
so insert only) and changed a few fillfactors:

   relname   | reltuples|reloptions
-+ ---+--
 audit_log| 804977 |
 pgbench_accounts  |   1529890 | {fillfactor=90}
 pgbench_accounts_pkey |   1529890 | {fillfactor=50}
 pgbench_branches  |   20 | {fillfactor=100}
 pgbench_branches_pkey |   20 |
 pgbench_history |  94062 |
 pgbench_tellers  |   200   | {fillfactor=100}
 pgbench_tellers_pkey |   200|
(8 rows)


and after running pgbench -n -c 4 -j 2 -T 300 a few times, i used
attached free_space.sql to see what pg_freespacemap, pgstattuple and
relation_free_space had to say about these tables. the result is
attached in result_free_space.out

my first conclusion is that pg_freespacemap is unreliable when indexes
are involved (and looking at the documentation of that module confirms
that), also the fact that FSM is not designed for accuracy make me
think is not an option.

pgstattuple and relation_free_space are very close in all the numbers
except for 2 indexes pgbench_branches_pkey and pgbench_tellers_pkey;
after a VACUUM FULL and  a REINDEX (and the difference persistence) i
checked pgbench_tellers_pkey contents (it has only one page besides
the metapage) and the numbers that i look at where:

page size: 8192
free size:  4148

which in good romance means 50% of free space... so, answering Noah's
question: if that difference has some meaning i can't see it but
looking at the evidence the measure relation_free_space() is giving is
the good one

so, tomorrow (or ...looking at the clock... later today) i will update
the relation_free_space() patch to accept toast tables and other kind
of indexes and add it to the commitfest unless someone says that my
math is wrong and somehow there is a more accurate way of getting the
free space (which is entirely possible)

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
with relation(relid, relname, relpages) as (
	select oid, relname, relpages 
	  from pg_class 
	 where relkind in ('r', 'i') and (relname like 'pgbench%' or relname = 'audit_log')
),
q(relid, relname, total_size, free_size) as (
	select relid, relname, pg_relation_size(relid::regclass), (select sum(avail) from pg_freespace(relid::regclass)) 
	  from relation
)
select relname, total_size, round((free_size::numeric / total_size), 6) as fsm_free_size, relation_free_space(relid::regclass::text),
   ((pgstattuple(relid)).free_percent / 100) pgstattuple_free_pct
  from q
 order by 1;


result_free_space.out
Description: Binary data

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-14 Thread Noah Misch
On Sat, Jan 14, 2012 at 04:41:57AM -0500, Jaime Casanova wrote:
 pgstattuple and relation_free_space are very close in all the numbers
 except for 2 indexes pgbench_branches_pkey and pgbench_tellers_pkey;
 after a VACUUM FULL and  a REINDEX (and the difference persistence) i
 checked pgbench_tellers_pkey contents (it has only one page besides
 the metapage) and the numbers that i look at where:
 
 page size: 8192
 free size:  4148
 
 which in good romance means 50% of free space... so, answering Noah's
 question: if that difference has some meaning i can't see it but
 looking at the evidence the measure relation_free_space() is giving is
 the good one
 
 so, tomorrow (or ...looking at the clock... later today) i will update
 the relation_free_space() patch to accept toast tables and other kind
 of indexes and add it to the commitfest unless someone says that my
 math is wrong and somehow there is a more accurate way of getting the
 free space (which is entirely possible)

Did you see this followup[1]?  To summarize:

- pgstattuple() and relation_free_space() should emit the same number, even if
  that means improving pgstattuple() at the same time.
- relation_free_space() belongs in the pgstattuple extension, because its role
  is cheaper access to a single pgstattuple() metric.

Thanks,
nm

[1] 
http://archives.postgresql.org/message-id/20111218165625.gb6...@tornado.leadboat.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2012-01-14 Thread Jaime Casanova
On Sat, Jan 14, 2012 at 6:26 AM, Noah Misch n...@leadboat.com wrote:

 - pgstattuple() and relation_free_space() should emit the same number, even if
  that means improving pgstattuple() at the same time.

yes, i just wanted to understand which one was more accurate and
why... and give the opportunity for anyone to point my error if any

 - relation_free_space() belongs in the pgstattuple extension, because its role
  is cheaper access to a single pgstattuple() metric.


oh! right! so, what about just fixing the free_percent that
pgstattuple is providing

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-12-18 Thread Noah Misch
On Fri, Dec 16, 2011 at 02:02:03AM -0500, Greg Smith wrote:
 On 12/15/2011 04:11 PM, Noah Misch wrote:
 Is one of those index figures simply wrong, or do they measure two senses of
 free space, both of which are interesting to DBAs?

 I think the bigger one--the one I was aiming to measure--also includes  
 fill-factor space.  It should be possible to isolate whether that's true  
 by running the function against a fresh index, or by trying tests with a  
 table where there's no useful fill.  I need to add some of those to the  
 test example suite.

No, both measures include fillfactor space.  From a brief look at the code, the
proposed function counts space in non-leaf pages, while pgstattuple does not.
Also, the proposed function counts half-dead pages like live pages, while
pgstattuple counts them like dead pages.

One could perhaps justify those choices either way, but they seem too esoteric
for DBA exposure.  I recommend choosing a policy on each and making both
pgstattuple() and any new code respect that policy.

 Shaking out the alternate implementation ideas was really my goal for  
 this CF here.  The major goal of the next revision is to present the  
 options with a measure of their respective accuracy and runtime.  If I  
 have to give up just a of bit of accuracy and make it much faster,  
 that's probably what most people want as an option.  When Jaime and I  
 come back with an update, it really needs to have benchmarks and  
 accuracy numbers for each option.  That may be complicated a bit  
 depending on how much of the table or index is cached, so isolating that  
 out will be a pain.

The previous submission seemed to boil down to a speedier version of SELECT
free_percent FROM pgstattuple('foo').  (Some of the other statistics aren't
cheap.)  Considering that, the code does belong in the pgstattuple module.

The sampling approach you have mentioned sounds promising, especially for
indexes.  For heap bloat, it may be hard to improve on pg_freespacemap-based and
check_postgres-style estimates with anything less than a full heap scan.

Thanks,
nm

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-12-15 Thread Greg Smith

On 11/28/2011 05:40 AM, Greg Smith wrote:
Ignoring fillfactor seems to have even more downsides as I see it.  
Certainly deserves a doc improvement, as well as fixing the 
description of the value so it's clearly a ratio rather than a true 
percentage.


So:  I'm very clear on what to do here now:

-Make the computation be in units that match it documetnation
-Take a look at other index types, as well as TOAST, at least to get the 
easy ones right.

-Fully confirm the extension upgrade logic works as hoped

That's the must do stuff.  Then there's two more features to consider 
and do something with if sensible:


-Double check whether there is really a useful role in using 
pg_freespace here.  I don't think the numbers will be as good, but maybe 
we don't care.
-Once the above is all sorted, add a second UI that samples some pages 
and extrapolates, ANALYZE-style, rather than hitting everything.


This ones leaves as returned with feedback, feeling pretty good it will 
be whipped into good shape for the last 9.2 CommitFest.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-12-15 Thread Noah Misch
On Sun, Nov 06, 2011 at 10:20:49PM +0100, Bernd Helmle wrote:
 --On 6. November 2011 01:08:11 -0200 Greg Smith g...@2ndquadrant.com wrote:

 Attached patch adds a new function to the pageinspect extension for measuring
 total free space, in either tables or indexes.

 I wonder if that should be done in the pgstattuple module, which output 
 some similar numbers.

Indeed, pgstattuple already claims to show precisely the same measure.  Its
reckoning is right in line for heaps, but the proposed pageinspect function
finds more free space in indexes:

[local] test=# SELECT t.free_percent, relation_free_space('pg_proc'), 
i.free_percent, relation_free_space('pg_proc_proname_args_nsp_index') FROM 
pgstattuple('pg_proc') t, pgstattuple('pg_proc_proname_args_nsp_index') i;
 free_percent | relation_free_space | free_percent | relation_free_space 
--+-+--+-
 2.53 |   0.0253346 | 8.61 |0.128041
(1 row)

Is one of those index figures simply wrong, or do they measure two senses of
free space, both of which are interesting to DBAs?

Thanks,
nm

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-12-15 Thread Greg Smith

On 12/15/2011 04:11 PM, Noah Misch wrote:

Is one of those index figures simply wrong, or do they measure two senses of
free space, both of which are interesting to DBAs?
   


I think the bigger one--the one I was aiming to measure--also includes 
fill-factor space.  It should be possible to isolate whether that's true 
by running the function against a fresh index, or by trying tests with a 
table where there's no useful fill.  I need to add some of those to the 
test example suite.


While in theory both measures of free space might be interesting to 
DBAs, I'd prefer to have the one that reflects the lost space to 
fill-factor if I'm checking an index.  But as Robert Treat was pointing 
out, even the very rough estimates being made with existing user-space 
tools not using the contrib module features are helpful enough for a lot 
of users.  So long as it's easy and accuracy is good enough to find 
bloated indexes, either implementation is probably good enough.


Shaking out the alternate implementation ideas was really my goal for 
this CF here.  The major goal of the next revision is to present the 
options with a measure of their respective accuracy and runtime.  If I 
have to give up just a of bit of accuracy and make it much faster, 
that's probably what most people want as an option.  When Jaime and I 
come back with an update, it really needs to have benchmarks and 
accuracy numbers for each option.  That may be complicated a bit 
depending on how much of the table or index is cached, so isolating that 
out will be a pain.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-12-07 Thread Jaime Casanova
On Mon, Nov 28, 2011 at 5:40 AM, Greg Smith g...@2ndquadrant.com wrote:

 Unless I am missing something, all indexes are handled via a procedure
 designed for BTree indices, GetBTRelationFreeSpace.  I don't know
 that the ultimate behavior of this is wrong, but it seems unusual.  If
 I get some more time, I will try to explore what is actually going on
 when called on other types of indexes.


 This I think I'll punt back toward Jaime, as well as asking did you have a
 plan for TOAST here?


for indexes. it seems pageinspect only deals with btree indexes and i
neglected to put a similar limitation on this function... now, because
the free space is calculated using PageGetFreeSpace() for indexes it
should be doing the right thing for all kind of indexes, i only put
the function there because i was trying to avoid to create a new file.
But if the function is right for all kind of indexes that's maybe
enough to create a new file and rename the helper function so is
obvious that it can manage all kind of indexes

for toast tables. a simple test here seems to show that is as easy as
to add toast tables in the supported objects and treat them as normal
pages...

or there is something i'm missing?

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-28 Thread Greg Smith

On 11/25/2011 04:42 PM, Jeff Janes wrote:

It reports space that is free exclusively for updates as being free.
In other words, it considers space free even if it is reserved against
inserts in deference to fillfactor.  This is in contrast to
pg_freespace, which only reports space available for inserts as being
available.  I think this is reasonable behavior, but it is subtle and
should perhaps be documented.


Ah, that's right, this is why I first wandered this specific path.  
Ignoring fillfactor seems to have even more downsides as I see it.  
Certainly deserves a doc improvement, as well as fixing the description 
of the value so it's clearly a ratio rather than a true percentage.



(Is it common to use fill factors other
than the default in the first place?  Do we assume that people using
fillfactor are sophisticated enough not to shot themselves in the
foot?)


It's not common, and I think anyone who sets fillfactor themselves would 
understand the downside.  The bigger risk are people who inherit designs 
from others that use this feature, but the new person doesn't understand 
it.  If using this feature calls attention to a problem there that 
prompts an investigation, I'd see that as a good thing, rather than a 
foot shot.



Unless I am missing something, all indexes are handled via a procedure
designed for BTree indices, GetBTRelationFreeSpace.  I don't know
that the ultimate behavior of this is wrong, but it seems unusual.  If
I get some more time, I will try to explore what is actually going on
when called on other types of indexes.


This I think I'll punt back toward Jaime, as well as asking did you 
have a plan for TOAST here?



--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-25 Thread Jeff Janes
On Mon, Nov 14, 2011 at 2:02 PM, Jaime Casanova ja...@2ndquadrant.com wrote:
 On Wed, Nov 9, 2011 at 7:58 AM, Alvaro Herrera
 alvhe...@commandprompt.com wrote:

 Excerpts from Jaime Casanova's message of mar nov 08 18:12:25 -0300 2011:
 On Sun, Nov 6, 2011 at 5:38 AM, Magnus Hagander mag...@hagander.net wrote:
 
  Looks pretty useful.

 thanks for the review, attached is a new version of it

 Note that AFAIK you shouldn't update the 1.0 extension script ... you
 have to create a 1.1 version (or whatever), update the default version
 in the control file, and create an 1.0--1.1 script to upgrade from the
 original version to 1.1.


 good point... fixed that...
 a question i have is: are we supposed to let the old script (1.0) around?

Since the syntax to install a non-default version is supported, I
would argue the old script should be kept.
CREATE extension pageinspect with version 1.0

This patch applies and builds cleanly.  It works either for CREATE
EXTENSION from scratch, or for updating from the prior version with
ALTER EXTENSION..UPDATE.

It seems to be using the buffer ring strategy as advertised.

It reports space that is free exclusively for updates as being free.
In other words, it considers space free even if it is reserved against
inserts in deference to fillfactor.  This is in contrast to
pg_freespace, which only reports space available for inserts as being
available.  I think this is reasonable behavior, but it is subtle and
should perhaps be documented.  (Is it common to use fill factors other
than the default in the first place?  Do we assume that people using
fillfactor are sophisticated enough not to shot themselves in the
foot?)

As noted by Greg, the documentation calls it total amount of free
free [sic] space when that is not what is reported.  However, it also
is not reporting a percentage, but rather a decimal fraction.  The
reported value should be multiplied by 100, especially if the docs are
going to be changed to call it a percentage.

Unless I am missing something, all indexes are handled via a procedure
designed for BTree indices, GetBTRelationFreeSpace.  I don't know
that the ultimate behavior of this is wrong, but it seems unusual.  If
I get some more time, I will try to explore what is actually going on
when called on other types of indexes.

I have no insight into how to handle toast tables, or non-superusers.
I had thought that toast tables had names of their own which could be
used, but I could not figure out how to do that.

Even if there are other ways to get approximately the same
information, this functionality seems to be a natural thing to have in
the pageinspect extension.

Cheers,

Jeff

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-14 Thread Jaime Casanova
On Wed, Nov 9, 2011 at 7:58 AM, Alvaro Herrera
alvhe...@commandprompt.com wrote:

 Excerpts from Jaime Casanova's message of mar nov 08 18:12:25 -0300 2011:
 On Sun, Nov 6, 2011 at 5:38 AM, Magnus Hagander mag...@hagander.net wrote:
 
  Looks pretty useful.

 thanks for the review, attached is a new version of it

 Note that AFAIK you shouldn't update the 1.0 extension script ... you
 have to create a 1.1 version (or whatever), update the default version
 in the control file, and create an 1.0--1.1 script to upgrade from the
 original version to 1.1.


good point... fixed that...
a question i have is: are we supposed to let the old script (1.0) around?

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
new file mode 100644
index 13ba6d3..63fab95
*** a/contrib/pageinspect/Makefile
--- b/contrib/pageinspect/Makefile
*** MODULE_big	= pageinspect
*** 4,10 
  OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
  
  EXTENSION = pageinspect
! DATA = pageinspect--1.0.sql pageinspect--unpackaged--1.0.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
--- 4,12 
  OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o
  
  EXTENSION = pageinspect
! DATA = pageinspect--1.0.sql pageinspect--1.1.sql \
!pageinspect--1.0--1.1.sql \
!pageinspect--unpackaged--1.0.sql
  
  ifdef USE_PGXS
  PG_CONFIG = pg_config
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
new file mode 100644
index dbb2158..8be21ed
*** a/contrib/pageinspect/btreefuncs.c
--- b/contrib/pageinspect/btreefuncs.c
***
*** 34,39 
--- 34,40 
  #include utils/builtins.h
  #include utils/rel.h
  
+ #include btreefuncs.h
  
  extern Datum bt_metap(PG_FUNCTION_ARGS);
  extern Datum bt_page_items(PG_FUNCTION_ARGS);
*** GetBTPageStatistics(BlockNumber blkno, B
*** 155,160 
--- 156,204 
  		stat-avg_item_size = 0;
  }
  
+ /*
+  * GetBTRelationFreeSpace
+  *
+  * Get the free space for a btree index.
+  * This is a helper function for relation_free_space()
+  *
+  */
+ float4
+ GetBTRelationFreeSpace(Relation rel)
+ {
+ 	BTPageStat stat;
+ 
+ 	Buffer		buffer;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	Size 		free_space = 0;
+ 	double		free_percent = 0;
+ 
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 	
+ 	/* Skip page 0 because it is a metapage */
+ 	for (blkno = 1; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 		/* 
+ 		 * get the statistics of the indexes and use that info
+ 		 * to determine free space on the page
+ 		 */
+ 		GetBTPageStatistics(blkno, buffer, stat);
+ 		if (stat.type == 'd')
+ 			free_space += stat.page_size;
+ 		else
+ 			free_space += stat.free_size;		
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksInRelation  1)
+ 		free_percent = ((float4) free_space) / ((totalBlcksInRelation - 1) * BLCKSZ);
+ 	return free_percent;
+ }
+ 
+ 
  /* ---
   * bt_page()
   *
diff --git a/contrib/pageinspect/btreefuncs.h b/contrib/pageinspect/btreefuncs.h
new file mode 100644
index ...549f878
*** a/contrib/pageinspect/btreefuncs.h
--- b/contrib/pageinspect/btreefuncs.h
***
*** 0 
--- 1,5 
+ /*
+  * contrib/pageinspect/btreefuncs.h
+  */
+ 
+ float4 GetBTRelationFreeSpace(Relation);
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index fa50655..e7436fb
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c
***
*** 28,33 
--- 28,36 
  #include funcapi.h
  #include utils/builtins.h
  #include miscadmin.h
+ #include storage/bufmgr.h
+ 
+ #include heapfuncs.h
  
  Datum		heap_page_items(PG_FUNCTION_ARGS);
  
*** bits_to_text(bits8 *bits, int len)
*** 55,60 
--- 58,96 
  }
  
  
+ /*
+  * GetHeapRelationFreeSpace()
+  *
+  * Get the free space for a heap relation.
+  * This is a helper function for relation_free_space()
+  */
+ float4
+ GetHeapRelationFreeSpace(Relation rel)
+ {
+ 	Buffer  buffer;
+ 	Page		page;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	Sizefree_space = 0;
+ 	double		free_percent = 0;
+ 
+ 	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 
+ 	for (blkno = 0; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 		page   = BufferGetPage(buffer);
+ 		free_space += PageGetHeapFreeSpace(page);
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksInRelation  0)
+ 		free_percent = ((float4) free_space) / (totalBlcksInRelation * BLCKSZ);
+ 	return 

Re: [HACKERS] Measuring relation free space

2011-11-09 Thread Alvaro Herrera

Excerpts from Jaime Casanova's message of mar nov 08 18:12:25 -0300 2011:
 On Sun, Nov 6, 2011 at 5:38 AM, Magnus Hagander mag...@hagander.net wrote:
 
  Looks pretty useful.
 
 thanks for the review, attached is a new version of it

Note that AFAIK you shouldn't update the 1.0 extension script ... you
have to create a 1.1 version (or whatever), update the default version
in the control file, and create an 1.0--1.1 script to upgrade from the
original version to 1.1.

-- 
Álvaro Herrera alvhe...@commandprompt.com
The PostgreSQL Company - Command Prompt, Inc.
PostgreSQL Replication, Consulting, Custom Development, 24x7 support

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-09 Thread Robert Treat
On Tue, Nov 8, 2011 at 7:19 PM, Greg Smith g...@2ndquadrant.com wrote:
 On 11/08/2011 05:07 PM, Robert Treat wrote:

 It's already easy to get good enough numbers based on user space
 tools with very little overhead, so I think it's more important that
 the server side tool be accurate rather than fast.

 What user space method do you consider good enough here?  I haven't found
 any approximation that I was really happy with; wouldn't have bothered with
 this otherwise.


check_postgres and the pg_bloat_report both use a method of comparing
on disk size vs estimated size based on table structure (or index
info). Run regularly, it's certainly possible to keep bloat under
control. That said, I'd still like to see something more accurate.


Robert Treat
conjecture: xzilla.net
consulting: omniti.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-08 Thread Greg Smith

On 11/06/2011 11:55 PM, Mark Kirkwood wrote:
I am guessing (at this point very much guessing) that pg_freespace map 
may return its data faster, as pageinspect is gonna have to grovel 
through all the pages for itself (whereas pg_freespacemap relies on 
using info from the ... free space map fork).


I started with pageinspect because I wasn't sure if other methods would 
be as accurate.  For example, I wasn't sure until just before submission 
that only the free space and size of the relation are needed to get a 
useful measure here; at one point I was considering some other things 
too.  I've ruled them out as unnecessary as far as I can tell, but I 
can't claim my tests are exhaustive.  Some review confirmation that this 
measure is useful for other people is one thing I was hoping for 
feedback on, as one thing to consider in addition to the actual 
implementation.


If this measurement is the only one needed, than as I said at the start 
of the thread here it might easily be implemented to run just against 
the free space map instead.  I'm thinking of what's been sent so far as 
a UI with matching output it should produce.  If it's possible to get 
the same numbers faster, exactly how to implement the function under the 
hood is easy enough to change.  Jaime already has a new version in 
development that adds a ring buffer for example.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us



Re: [HACKERS] Measuring relation free space

2011-11-08 Thread Jaime Casanova
On Sun, Nov 6, 2011 at 5:38 AM, Magnus Hagander mag...@hagander.net wrote:

 Looks pretty useful.


thanks for the review, attached is a new version of it

 One quick stylistic comment - we don't generally use * 1.0 to turn
 an int into a double - just use a cast.


ok


 Hooking into a ring buffer seems like an almost requirement before you
 can run this on a larger production system, wouldn't it? I don't know
 how  hard that is code-wise, but it certainly seems worthwhile.


seems it wasn't too difficult... i just have to indicate the right
buffer access strategy so it's using a ring buffer now

-- 
Jaime Casanova         www.2ndQuadrant.com
Professional PostgreSQL: Soporte 24x7 y capacitación
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
new file mode 100644
index dbb2158..8be21ed
*** a/contrib/pageinspect/btreefuncs.c
--- b/contrib/pageinspect/btreefuncs.c
***
*** 34,39 
--- 34,40 
  #include utils/builtins.h
  #include utils/rel.h
  
+ #include btreefuncs.h
  
  extern Datum bt_metap(PG_FUNCTION_ARGS);
  extern Datum bt_page_items(PG_FUNCTION_ARGS);
*** GetBTPageStatistics(BlockNumber blkno, B
*** 155,160 
--- 156,204 
  		stat-avg_item_size = 0;
  }
  
+ /*
+  * GetBTRelationFreeSpace
+  *
+  * Get the free space for a btree index.
+  * This is a helper function for relation_free_space()
+  *
+  */
+ float4
+ GetBTRelationFreeSpace(Relation rel)
+ {
+ 	BTPageStat stat;
+ 
+ 	Buffer		buffer;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	Size 		free_space = 0;
+ 	double		free_percent = 0;
+ 
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 	
+ 	/* Skip page 0 because it is a metapage */
+ 	for (blkno = 1; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 		/* 
+ 		 * get the statistics of the indexes and use that info
+ 		 * to determine free space on the page
+ 		 */
+ 		GetBTPageStatistics(blkno, buffer, stat);
+ 		if (stat.type == 'd')
+ 			free_space += stat.page_size;
+ 		else
+ 			free_space += stat.free_size;		
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksInRelation  1)
+ 		free_percent = ((float4) free_space) / ((totalBlcksInRelation - 1) * BLCKSZ);
+ 	return free_percent;
+ }
+ 
+ 
  /* ---
   * bt_page()
   *
diff --git a/contrib/pageinspect/btreefuncs.h b/contrib/pageinspect/btreefuncs.h
new file mode 100644
index ...549f878
*** a/contrib/pageinspect/btreefuncs.h
--- b/contrib/pageinspect/btreefuncs.h
***
*** 0 
--- 1,5 
+ /*
+  * contrib/pageinspect/btreefuncs.h
+  */
+ 
+ float4 GetBTRelationFreeSpace(Relation);
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index fa50655..e7436fb
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c
***
*** 28,33 
--- 28,36 
  #include funcapi.h
  #include utils/builtins.h
  #include miscadmin.h
+ #include storage/bufmgr.h
+ 
+ #include heapfuncs.h
  
  Datum		heap_page_items(PG_FUNCTION_ARGS);
  
*** bits_to_text(bits8 *bits, int len)
*** 55,60 
--- 58,96 
  }
  
  
+ /*
+  * GetHeapRelationFreeSpace()
+  *
+  * Get the free space for a heap relation.
+  * This is a helper function for relation_free_space()
+  */
+ float4
+ GetHeapRelationFreeSpace(Relation rel)
+ {
+ 	Buffer  buffer;
+ 	Page		page;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	Sizefree_space = 0;
+ 	double		free_percent = 0;
+ 
+ 	BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ 
+ 	for (blkno = 0; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy);
+ 		page   = BufferGetPage(buffer);
+ 		free_space += PageGetHeapFreeSpace(page);
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksInRelation  0)
+ 		free_percent = ((float4) free_space) / (totalBlcksInRelation * BLCKSZ);
+ 	return free_percent;
+ }
+ 
+ 
  /*
   * heap_page_items
   *
diff --git a/contrib/pageinspect/heapfuncs.h b/contrib/pageinspect/heapfuncs.h
new file mode 100644
index ...17b7cb3
*** a/contrib/pageinspect/heapfuncs.h
--- b/contrib/pageinspect/heapfuncs.h
***
*** 0 
--- 1,5 
+ /*
+  * contrib/pageinspect/heapfuncs.h
+  */
+ 
+ float4 GetHeapRelationFreeSpace(Relation);
diff --git a/contrib/pageinspect/pageinspect--1.0.sql b/contrib/pageinspect/pageinspect--1.0.sql
new file mode 100644
index 5613956..4502a13
*** a/contrib/pageinspect/pageinspect--1.0.sql
--- b/contrib/pageinspect/pageinspect--1.0.sql
*** CREATE FUNCTION fsm_page_contents(IN pag
*** 105,107 
--- 105,115 
  RETURNS text
  AS 'MODULE_PATHNAME', 

Re: [HACKERS] Measuring relation free space

2011-11-08 Thread Robert Treat
On Tue, Nov 8, 2011 at 1:07 PM, Greg Smith g...@2ndquadrant.com wrote:
 On 11/06/2011 11:55 PM, Mark Kirkwood wrote:

 I am guessing (at this point very much guessing) that pg_freespace map may
 return its data faster, as pageinspect is gonna have to grovel through all
 the pages for itself (whereas pg_freespacemap relies on using info from the
 ... free space map fork).

 I started with pageinspect because I wasn't sure if other methods would be
 as accurate.  For example, I wasn't sure until just before submission that
 only the free space and size of the relation are needed to get a useful
 measure here; at one point I was considering some other things too.  I've
 ruled them out as unnecessary as far as I can tell, but I can't claim my
 tests are exhaustive.  Some review confirmation that this measure is useful
 for other people is one thing I was hoping for feedback on, as one thing to
 consider in addition to the actual implementation.

 If this measurement is the only one needed, than as I said at the start of
 the thread here it might easily be implemented to run just against the free
 space map instead.  I'm thinking of what's been sent so far as a UI with
 matching output it should produce.  If it's possible to get the same numbers
 faster, exactly how to implement the function under the hood is easy enough
 to change.  Jaime already has a new version in development that adds a ring
 buffer for example.

It's already easy to get good enough numbers based on user space
tools with very little overhead, so I think it's more important that
the server side tool be accurate rather than fast. Of course, if we
can get both, that's a bonus, but I'd rather not go that route at the
expense of accuracy. Just my .02.

Robert Treat
conjecture: xzilla.net
consulting: omniti.com

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-08 Thread Greg Smith

On 11/08/2011 05:07 PM, Robert Treat wrote:

It's already easy to get good enough numbers based on user space
tools with very little overhead, so I think it's more important that
the server side tool be accurate rather than fast.


What user space method do you consider good enough here?  I haven't 
found any approximation that I was really happy with; wouldn't have 
bothered with this otherwise.


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-06 Thread Magnus Hagander
On Sun, Nov 6, 2011 at 04:08, Greg Smith g...@2ndquadrant.com wrote:
 Attached patch adds a new function to the pageinspect extension for
 measuring total free space, in either tables or indexes.  It returns the
 free space as a percentage, so higher numbers mean more bloat.  After trying
 a couple of ways to quantify it, I've found this particular measure
 correlates well with the nastiest bloat issues I've ran into in production
 recently.  For example, an index that had swelled to over 5X its original
 size due to autovacuum issues registered at 0.86 on this scale.  I could
 easily see people putting an alert at something like 0.40 and picking
 candidates to reindex based on it triggering.  That would be about a million
 times smarter than how I've been muddling through this class of problems so
 far.

 Code by Jaime Casanova, based on a prototype by me.  Thanks to attendees and
 sponsors of the PgWest conference for helping to fund some deeper
 exploration of this idea.

Looks pretty useful.

One quick stylistic comment - we don't generally use * 1.0 to turn
an int into a double - just use a cast.


 -Given this is doing a full table scan, should it hook into a ring buffer to
 keep from trashing the buffer cache?  Or might it loop over the relation in
 a different way all together?  I was thinking about eyeing the FSM instead
 at one point, didn't explore that yet.  There's certainly a few ways to
 approach this, we just aimed at the easiest way to get a working starter
 implementation, and associated results to compare others against.

Hooking into a ring buffer seems like an almost requirement before you
can run this on a larger production system, wouldn't it? I don't know
how  hard that is code-wise, but it certainly seems worthwhile.


 -Should there be a non-superuser version of this?  We'd certainly need to
 get a less cache demolishing version before that would seem wise.

Not sure that's necessary - at least not for now. Many other
diagnostics functions are already superuser only...


 -There were related things in the pageinspect module, but a case could be
 made for this being a core function instead.  It's a bit more likely to be
 used in production than the rest of that extension.

A case can be made for a lot of things in contrib to be in core ;) I
say let's keep it in pageinspect, but then also have you finish off
that split up the contrib patch :-)


 -What if anything related to TOAST should this handle?

Similar data for TOAST relations would be intersting, no? But that's
easily done from userspace by just querying to the toast table
specifically, I assume?


-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-06 Thread Bernd Helmle



--On 6. November 2011 01:08:11 -0200 Greg Smith g...@2ndquadrant.com wrote:


Attached patch adds a new function to the pageinspect extension for measuring
total free space, in either tables or indexes.


I wonder if that should be done in the pgstattuple module, which output some 
similar numbers.


--
Thanks

Bernd

--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] Measuring relation free space

2011-11-06 Thread Mark Kirkwood

On 07/11/11 10:20, Bernd Helmle wrote:



--On 6. November 2011 01:08:11 -0200 Greg Smith g...@2ndquadrant.com 
wrote:


Attached patch adds a new function to the pageinspect extension for 
measuring

total free space, in either tables or indexes.


I wonder if that should be done in the pgstattuple module, which 
output some similar numbers.




Not meaning to disparage Greg's effort in any way, but I was thinking 
the same thing about pg_freespacemap. I have not checked what - if any 
differences there are in output, but it would be interesting to compare 
which of the various (3 at present) extensions with slightly overlapping 
areas of functionality should perhaps be merged.


I am guessing (at this point very much guessing) that pg_freespace map 
may return its data faster, as pageinspect is gonna have to grovel 
through all the pages for itself (whereas pg_freespacemap relies on 
using info from the ... free space map fork).


regards

Mark




[HACKERS] Measuring relation free space

2011-11-05 Thread Greg Smith
Attached patch adds a new function to the pageinspect extension for 
measuring total free space, in either tables or indexes.  It returns the 
free space as a percentage, so higher numbers mean more bloat.  After 
trying a couple of ways to quantify it, I've found this particular 
measure correlates well with the nastiest bloat issues I've ran into in 
production recently.  For example, an index that had swelled to over 5X 
its original size due to autovacuum issues registered at 0.86 on this 
scale.  I could easily see people putting an alert at something like 
0.40 and picking candidates to reindex based on it triggering.  That 
would be about a million times smarter than how I've been muddling 
through this class of problems so far.


Code by Jaime Casanova, based on a prototype by me.  Thanks to attendees 
and sponsors of the PgWest conference for helping to fund some deeper 
exploration of this idea.


Here's a test case showing it in action:

create extension pageinspect;
create table t (k serial,v integer);
insert into t(v) (select generate_series(1,10));
create index t_idx on t(k);
delete from t where k5;
vacuum t;

gsmith=# select relation_free_space('t');
 relation_free_space
-
0.445466

gsmith=# select relation_free_space('t_idx');
 relation_free_space
-
0.550946

Some open questions in my mind:

-Given this is doing a full table scan, should it hook into a ring 
buffer to keep from trashing the buffer cache?  Or might it loop over 
the relation in a different way all together?  I was thinking about 
eyeing the FSM instead at one point, didn't explore that yet.  There's 
certainly a few ways to approach this, we just aimed at the easiest way 
to get a working starter implementation, and associated results to 
compare others against.


-Should there be a non-superuser version of this?  We'd certainly need 
to get a less cache demolishing version before that would seem wise.


-There were related things in the pageinspect module, but a case could 
be made for this being a core function instead.  It's a bit more likely 
to be used in production than the rest of that extension.


-What if anything related to TOAST should this handle?

We're also planning to do a sampling version of this, using the same 
approach ANALYZE does.  Grab a number of blocks, extrapolate from 
there.  It shouldn't take many samples before the accuracy is better 
than how people are estimated this now.  That work is just waiting on 
some better thinking about how to handle the full relation version first.


And, yes, the explanation in the docs and code should be clear that it's 
returning a percentage, which I just realized when writing this.  At 
least I remembered to document something; still ahead of the average new 
patch...


--
Greg Smith   2ndQuadrant USg...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support  www.2ndQuadrant.us

diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index dbb2158..aac9148 100644
*** a/contrib/pageinspect/btreefuncs.c
--- b/contrib/pageinspect/btreefuncs.c
***
*** 34,39 
--- 34,40 
  #include utils/builtins.h
  #include utils/rel.h
  
+ #include btreefuncs.h
  
  extern Datum bt_metap(PG_FUNCTION_ARGS);
  extern Datum bt_page_items(PG_FUNCTION_ARGS);
*** GetBTPageStatistics(BlockNumber blkno, B
*** 155,160 
--- 156,202 
  		stat-avg_item_size = 0;
  }
  
+ /*
+  * GetBTRelationFreeSpace
+  *
+  * Get the free space for a btree index.
+  * This is a helper function for relation_free_space()
+  *
+  */
+ float4
+ GetBTRelationFreeSpace(Relation rel)
+ {
+ 	BTPageStat stat;
+ 
+ 	Buffer		buffer;
+ 	BlockNumber blkno;
+ 	BlockNumber totalBlcksInRelation = RelationGetNumberOfBlocks(rel);
+ 	Size 		free_space = 0;
+ 	double		free_percent = 0;
+ 	
+ 	/* Skip page 0 because it is a metapage */
+ 	for (blkno = 1; blkno  totalBlcksInRelation; blkno++)
+ 	{
+ 		buffer = ReadBuffer(rel, blkno);
+ 		/* 
+ 		 * get the statistics of the indexes and use that info
+ 		 * to determine free space on the page
+ 		 */
+ 		GetBTPageStatistics(blkno, buffer, stat);
+ 		if (stat.type == 'd')
+ 			free_space += stat.page_size;
+ 		else
+ 			free_space += stat.free_size;		
+ 
+ 		ReleaseBuffer(buffer);
+ 	}
+ 
+ 	if (totalBlcksInRelation  1)
+ 		free_percent = (free_space * 1.0) / ((totalBlcksInRelation - 1) * BLCKSZ);
+ 	return free_percent;
+ }
+ 
+ 
  /* ---
   * bt_page()
   *
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index fa50655..c1d72ba 100644
*** a/contrib/pageinspect/heapfuncs.c
--- b/contrib/pageinspect/heapfuncs.c
***
*** 28,33 
--- 28,36 
  #include funcapi.h
  #include utils/builtins.h
  #include miscadmin.h
+ #include