Tom Lane escribió:
> Alvaro Herrera <[EMAIL PROTECTED]> writes:
> >> Given that the worst-case consequence is extra index vacuum passes,
> >> which don't hurt that much when a table is small, maybe some smaller
> >> estimate like 100 TIDs per page would be enough. Or, instead of
> >> using a hard-wired constant, look at pg_class.reltuples/relpages
> >> to estimate the average tuple density ...
>
> > This sounds like a reasonable compromise.
>
> Do you want to make it happen?
I'm not having much luck really. I think the problem is that ANALYZE
stores reltuples as the number of live tuples, so if you delete a big
portion of a big table, then ANALYZE and then VACUUM, there's a huge
misestimation and extra index cleanup passes happen, which is a bad
thing.
There seems to be no way to estimate the dead space, is there? We could
go to pgstats but that seems backwards.
I was having a problem at first with estimating for small tables which
had no valid info in pg_class.reltuples, but I worked around that by
using MaxHeapTuplesPerPage. (I was experimenting with the code that
estimates average tuple width in estimate_rel_size() but then figured it
was too much work.) So this part is fine AFAICS.
I attach the patch I am playing with, and the simple test I've been
examining (on which I comment the ANALYZE on some runs, change the
conditions on the DELETE, put the CREATE INDEX before insertion instead
of after it, etc).
--
Alvaro Herrera http://www.CommandPrompt.com/
The PostgreSQL Company - Command Prompt, Inc.
drop table if exists foo;
create table foo (a int, b varchar);
create index foo_idx on foo(a);
insert into foo select * from generate_series(1, 200000);
delete from foo where a % 2 = 0; -- or a % 3 = 0;
analyze foo;
vacuum verbose foo;
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.90
diff -c -p -r1.90 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c 30 May 2007 20:11:57 -0000 1.90
--- src/backend/commands/vacuumlazy.c 29 Aug 2007 18:36:18 -0000
*************** static int lazy_vacuum_page(Relation one
*** 120,126 ****
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
! static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
ItemPointer itemptr);
static void lazy_record_free_space(LVRelStats *vacrelstats,
--- 121,128 ----
static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
static BlockNumber count_nondeletable_pages(Relation onerel,
LVRelStats *vacrelstats);
! static void lazy_space_alloc(Relation onerel, LVRelStats *vacrelstats,
! BlockNumber relblocks);
static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
ItemPointer itemptr);
static void lazy_record_free_space(LVRelStats *vacrelstats,
*************** lazy_scan_heap(Relation onerel, LVRelSta
*** 289,295 ****
vacrelstats->rel_pages = nblocks;
vacrelstats->nonempty_pages = 0;
! lazy_space_alloc(vacrelstats, nblocks);
for (blkno = 0; blkno < nblocks; blkno++)
{
--- 291,297 ----
vacrelstats->rel_pages = nblocks;
vacrelstats->nonempty_pages = 0;
! lazy_space_alloc(onerel, vacrelstats, nblocks);
for (blkno = 0; blkno < nblocks; blkno++)
{
*************** count_nondeletable_pages(Relation onerel
*** 964,979 ****
* See the comments at the head of this file for rationale.
*/
static void
! lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
{
long maxtuples;
int maxpages;
if (vacrelstats->hasindex)
{
! maxtuples = (maintenance_work_mem * 1024L) / sizeof(ItemPointerData);
maxtuples = Min(maxtuples, INT_MAX);
maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
/* stay sane if small maintenance_work_mem */
maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
}
--- 966,999 ----
* See the comments at the head of this file for rationale.
*/
static void
! lazy_space_alloc(Relation onerel, LVRelStats *vacrelstats, BlockNumber relblocks)
{
long maxtuples;
int maxpages;
if (vacrelstats->hasindex)
{
! BlockNumber relpages;
! double reltuples;
!
! /* coerce values in pg_class to more desirable types */
! relpages = (BlockNumber) onerel->rd_rel->relpages;
! reltuples = (double) onerel->rd_rel->reltuples;
!
! /*
! * If the relation has never been vacuumed, assume worst-case
! * number of tuples. Otherwise, use the density from pg_class to
! * estimate it.
! */
! if (relpages > 0)
! maxtuples = (long) (reltuples / (double) relpages * (double) relblocks);
! else
! maxtuples = MaxHeapTuplesPerPage * relblocks;
!
! maxtuples = Min(maxtuples, (maintenance_work_mem * 1024L) / sizeof(ItemPointerData));
maxtuples = Min(maxtuples, INT_MAX);
maxtuples = Min(maxtuples, MaxAllocSize / sizeof(ItemPointerData));
+
/* stay sane if small maintenance_work_mem */
maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
}
*************** lazy_space_alloc(LVRelStats *vacrelstats
*** 987,992 ****
--- 1007,1014 ----
vacrelstats->dead_tuples = (ItemPointer)
palloc(maxtuples * sizeof(ItemPointerData));
+ elog(NOTICE, "alloc'ed %ld bytes for %ld tuples", maxtuples * sizeof(ItemPointerData), maxtuples);
+
maxpages = MaxFSMPages;
maxpages = Min(maxpages, MaxAllocSize / sizeof(PageFreeSpaceInfo));
/* No need to allocate more pages than the relation has blocks */
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
subscribe-nomail command to [EMAIL PROTECTED] so that your
message can get through to the mailing list cleanly