There's been a thread in pgsql-performance about ANALYZE reporting an
overly large number of dead rows because it counts anything that's not
visible-per-SnapshotNow-rules as "dead", in particular
insert-in-progress tuples are reported as "dead". This seems a bad
idea because it may prompt autovacuum to launch a useless vacuum.
The attached patch revises ANALYZE to distinguish tuple states the same
way VACUUM does, and to not count INSERT_IN_PROGRESS nor
DELETE_IN_PROGRESS tuples as either "live" or "dead".
Comments? In particular, should we report a count of in-doubt tuples in
the eventual LOG message, and does anyone want to argue for still
counting DELETE_IN_PROGRESS as "dead"?
regards, tom lane
*** src/backend/commands/analyze.c.orig Thu Nov 15 17:38:16 2007
--- src/backend/commands/analyze.c Fri Nov 16 17:45:10 2007
***************
*** 32,37 ****
--- 32,38 ----
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "storage/proc.h"
+ #include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
***************
*** 835,840 ****
--- 836,842 ----
double deadrows = 0; /* # dead rows seen */
double rowstoskip = -1; /* -1 means not set yet */
BlockNumber totalblocks;
+ TransactionId OldestXmin;
BlockSamplerData bs;
double rstate;
***************
*** 842,847 ****
--- 844,852 ----
totalblocks = RelationGetNumberOfBlocks(onerel);
+ /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
+ OldestXmin = GetOldestXmin(onerel->rd_rel->relisshared, true);
+
/* Prepare for sampling block numbers */
BlockSampler_Init(&bs, totalblocks, targrows);
/* Prepare for sampling rows */
***************
*** 862,917 ****
* We must maintain a pin on the target page's buffer to ensure
that
* the maxoffset value stays good (else concurrent VACUUM might
delete
* tuples out from under us). Hence, pin the page until we are
done
! * looking at it. We don't maintain a lock on the page, so
tuples
! * could get added to it, but we ignore such tuples.
*/
targbuffer = ReadBufferWithStrategy(onerel, targblock,
vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
- LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);
/* Inner loop over all tuples on the selected page */
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset;
targoffset++)
{
HeapTupleData targtuple;
ItemPointerSet(&targtuple.t_self, targblock,
targoffset);
! /* We use heap_release_fetch to avoid useless bufmgr
traffic */
! if (heap_release_fetch(onerel, SnapshotNow,
! &targtuple,
&targbuffer,
! true, NULL))
{
/*
* The first targrows live rows are simply
copied into the
* reservoir. Then we start replacing tuples in
the sample
! * until we reach the end of the relation.
This algorithm is
! * from Jeff Vitter's paper (see full citation
below). It
! * works by repeatedly computing the number of
tuples to skip
! * before selecting a tuple, which replaces a
randomly chosen
! * element of the reservoir (current set of
tuples). At all
! * times the reservoir is a true random sample
of the tuples
! * we've passed over so far, so when we fall
off the end of
! * the relation we're done.
*/
if (numrows < targrows)
rows[numrows++] =
heap_copytuple(&targtuple);
else
{
/*
! * t in Vitter's paper is the number of
records already
! * processed. If we need to compute a
new S value, we
! * must use the not-yet-incremented
value of liverows as
! * t.
*/
if (rowstoskip < 0)
! rowstoskip =
get_next_S(liverows, targrows, &rstate);
!
if (rowstoskip <= 0)
{
/*
! * Found a suitable tuple, so
save it, replacing one
! * old tuple at random
*/
int k =
(int) (targrows * random_fract());
--- 867,935 ----
* We must maintain a pin on the target page's buffer to ensure
that
* the maxoffset value stays good (else concurrent VACUUM might
delete
* tuples out from under us). Hence, pin the page until we are
done
! * looking at it. We also choose to hold sharelock on the
buffer
! * throughout --- we could release and re-acquire sharelock for
! * each tuple, but since we aren't doing much work per tuple,
the
! * extra lock traffic is probably better avoided.
*/
targbuffer = ReadBufferWithStrategy(onerel, targblock,
vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);
/* Inner loop over all tuples on the selected page */
for (targoffset = FirstOffsetNumber; targoffset <= maxoffset;
targoffset++)
{
+ ItemId itemid;
HeapTupleData targtuple;
+ itemid = PageGetItemId(targpage, targoffset);
+
+ /* Ignore unused, redirect, and dead tuple slots */
+ if (!ItemIdIsNormal(itemid))
+ continue;
+
ItemPointerSet(&targtuple.t_self, targblock,
targoffset);
!
! targtuple.t_data = (HeapTupleHeader)
PageGetItem(targpage, itemid);
! targtuple.t_len = ItemIdGetLength(itemid);
!
! switch (HeapTupleSatisfiesVacuum(targtuple.t_data,
!
OldestXmin,
!
targbuffer))
{
+ case HEAPTUPLE_LIVE:
+
/*
* The first targrows live rows are
simply copied into the
* reservoir. Then we start replacing
tuples in the sample
! * until we reach the end of the
relation. This algorithm
! * is from Jeff Vitter's paper (see
full citation
! * below). It works by repeatedly
computing the number of
! * tuples to skip before selecting a
tuple, which replaces
! * a randomly chosen element of the
reservoir (current set
! * of tuples). At all times the
reservoir is a true
! * random sample of the tuples we've
passed over so far,
! * so when we fall off the end of the
relation we're done.
*/
if (numrows < targrows)
rows[numrows++] =
heap_copytuple(&targtuple);
else
{
/*
! * t in Vitter's paper is the
number of records
! * already processed. If we
need to compute a new S
! * value, we must use the
not-yet-incremented value of
! * liverows as t.
*/
if (rowstoskip < 0)
! rowstoskip =
get_next_S(liverows, targrows,
!
&rstate);
if (rowstoskip <= 0)
{
/*
! * Found a suitable
tuple, so save it, replacing
! * one old tuple at
random
*/
int
k = (int) (targrows * random_fract());
***************
*** 924,939 ****
}
liverows += 1;
! }
! else
! {
! /* Count dead rows, but not empty slots */
! if (targtuple.t_data != NULL)
deadrows += 1;
}
}
! /* Now release the pin on the page */
ReleaseBuffer(targbuffer);
}
--- 942,968 ----
}
liverows += 1;
! break;
!
! case HEAPTUPLE_DEAD:
! case HEAPTUPLE_RECENTLY_DEAD:
! /* Count dead and recently-dead rows */
deadrows += 1;
+ break;
+
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+ /* We do not currently count in-doubt
rows */
+ break;
+
+ default:
+ elog(ERROR, "unexpected
HeapTupleSatisfiesVacuum result");
+ break;
}
}
! /* Now release the lock and pin on the page */
! LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(targbuffer);
}
---------------------------(end of broadcast)---------------------------
TIP 7: You can help support the PostgreSQL project by donating at
http://www.postgresql.org/about/donate