On Mon, Oct 13, 2008 at 8:30 AM, Tom Lane <[EMAIL PROTECTED]> wrote:
> Heikki Linnakangas <[EMAIL PROTECTED]> writes:
> > No, I was thinking of something along the lines of:
> > INFO: clustering "public.my_c"
> > INFO: complete, was 33%, now 100% clustered
> > The only such measure that we have is the correlation, which isn't very
> > good anyway, so I'm not sure if that's worthwhile.
>
> It'd be possible to count the number of order reversals during the
> indexscan, ie the number of tuples with CTID lower than the previous
> one's. But I'm not sure how useful that number really is. Also it's
> not clear how to preserve such functionality if cluster is
> re-implemented with a sort.
>
> regards, tom lane
>
Another version of the patch should be attached, this time counting the
number of "inversions" (pairs of tuples in the table that are in the wrong
order) as a measure of the "sortedness" of the original data (scanned/live
numbers still reported as an indication of the extent to which the table was
vacuumed).
N.B. -- I'm not familiar enough with Postgres internals to know if the
included inversion_count() method is a valid way to identify inversions.
In any case, example VERBOSE output:
postgres=# CLUSTER public.my_c VERBOSE ;
INFO: clustering "public.my_c"
INFO: complete, 15 tuples scanned, 10 tuples now live, 2 inversions
DETAIL: CPU 0.00s/0.00u sec elapsed 0.00 sec.
CLUSTER
Index: src/backend/commands/cluster.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/cluster.c,v
retrieving revision 1.177
diff -c -r1.177 cluster.c
*** src/backend/commands/cluster.c 19 Jun 2008 00:46:04 -0000 1.177
--- src/backend/commands/cluster.c 13 Oct 2008 14:25:09 -0000
***************
*** 46,51 ****
--- 46,52 ----
#include "utils/snapmgr.h"
#include "utils/syscache.h"
#include "utils/tqual.h"
+ #include "utils/pg_rusage.h"
/*
***************
*** 59,70 ****
Oid indexOid;
} RelToCluster;
!
! static void cluster_rel(RelToCluster *rv, bool recheck);
! static void rebuild_relation(Relation OldHeap, Oid indexOid);
! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
static List *get_tables_to_cluster(MemoryContext cluster_context);
!
/*---------------------------------------------------------------------------
--- 60,80 ----
Oid indexOid;
} RelToCluster;
! /*
! * This struct is used to collect stats for VERBOSE output.
! */
! typedef struct
! {
! double tuples_scanned ;
! double tuples_copied ;
! double inversions ;
! } ClusterStatistics;
!
! static void cluster_rel(RelToCluster *rv, bool recheck, int elevel);
! static void rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs);
! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs);
static List *get_tables_to_cluster(MemoryContext cluster_context);
! static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) ;
/*---------------------------------------------------------------------------
***************
*** 176,182 ****
heap_close(rel, NoLock);
/* Do the job */
! cluster_rel(&rvtc, false);
}
else
{
--- 186,192 ----
heap_close(rel, NoLock);
/* Do the job */
! cluster_rel(&rvtc, false, stmt->verbose ? INFO : DEBUG2);
}
else
{
***************
*** 225,231 ****
StartTransactionCommand();
/* functions in indexes may want a snapshot set */
PushActiveSnapshot(GetTransactionSnapshot());
! cluster_rel(rvtc, true);
PopActiveSnapshot();
CommitTransactionCommand();
}
--- 235,241 ----
StartTransactionCommand();
/* functions in indexes may want a snapshot set */
PushActiveSnapshot(GetTransactionSnapshot());
! cluster_rel(rvtc, true, stmt->verbose ? INFO : DEBUG2);
PopActiveSnapshot();
CommitTransactionCommand();
}
***************
*** 253,261 ****
* them incrementally while we load the table.
*/
static void
! cluster_rel(RelToCluster *rvtc, bool recheck)
{
Relation OldHeap;
/* Check for user-requested abort. */
CHECK_FOR_INTERRUPTS();
--- 263,273 ----
* them incrementally while we load the table.
*/
static void
! cluster_rel(RelToCluster *rvtc, bool recheck, int elevel)
{
Relation OldHeap;
+ PGRUsage ru0 ;
+ ClusterStatistics cs ;
/* Check for user-requested abort. */
CHECK_FOR_INTERRUPTS();
***************
*** 343,349 ****
check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck);
/* rebuild_relation does all the dirty work */
! rebuild_relation(OldHeap, rvtc->indexOid);
/* NB: rebuild_relation does heap_close() on OldHeap */
}
--- 355,373 ----
check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck);
/* rebuild_relation does all the dirty work */
! ereport(elevel,
! (errmsg("clustering \"%s.%s\"",
! get_namespace_name(RelationGetNamespace(OldHeap)),
! RelationGetRelationName(OldHeap))));
! pg_rusage_init(&ru0);
! cs.tuples_scanned = cs.tuples_copied = cs.inversions = 0 ;
! rebuild_relation(OldHeap, rvtc->indexOid, &cs);
! ereport(elevel,
! (errmsg("complete, %.0f tuples scanned, %.0f tuples now live, %.0f inversions",
! cs.tuples_scanned,
! cs.tuples_copied,
! cs.inversions),
! errdetail("%s.", pg_rusage_show(&ru0))));
/* NB: rebuild_relation does heap_close() on OldHeap */
}
***************
*** 556,566 ****
*
* OldHeap: table to rebuild --- must be opened and exclusive-locked!
* indexOid: index to cluster by
*
* NB: this routine closes OldHeap at the right time; caller should not.
*/
static void
! rebuild_relation(Relation OldHeap, Oid indexOid)
{
Oid tableOid = RelationGetRelid(OldHeap);
Oid tableSpace = OldHeap->rd_rel->reltablespace;
--- 580,591 ----
*
* OldHeap: table to rebuild --- must be opened and exclusive-locked!
* indexOid: index to cluster by
+ * cs: ClusterStatistics for VERBOSE option
*
* NB: this routine closes OldHeap at the right time; caller should not.
*/
static void
! rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs)
{
Oid tableOid = RelationGetRelid(OldHeap);
Oid tableSpace = OldHeap->rd_rel->reltablespace;
***************
*** 593,599 ****
/*
* Copy the heap data into the new table in the desired order.
*/
! frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid);
/* To make the new heap's data visible (probably not needed?). */
CommandCounterIncrement();
--- 618,624 ----
/*
* Copy the heap data into the new table in the desired order.
*/
! frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid, cs);
/* To make the new heap's data visible (probably not needed?). */
CommandCounterIncrement();
***************
*** 703,709 ****
* freeze cutoff point for the tuples.
*/
static TransactionId
! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
{
Relation NewHeap,
OldHeap,
--- 728,734 ----
* freeze cutoff point for the tuples.
*/
static TransactionId
! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs)
{
Relation NewHeap,
OldHeap,
***************
*** 719,724 ****
--- 744,751 ----
TransactionId OldestXmin;
TransactionId FreezeXid;
RewriteState rwstate;
+ BlockIdData bid_old = { 0, 0 } ;
+ OffsetNumber off_old = 0 ;
/*
* Open the relations we need.
***************
*** 784,789 ****
--- 811,820 ----
CHECK_FOR_INTERRUPTS();
+ if(cs) {
+ cs->tuples_scanned++ ;
+ }
+
/* Since we used no scan keys, should never need to recheck */
if (scan->xs_recheck)
elog(ERROR, "CLUSTER does not support lossy index conditions");
***************
*** 856,861 ****
--- 887,903 ----
*
* So, we must reconstruct the tuple from component Datums.
*/
+ /* ...keep track of inversions (pairs of tuples in the table that are in the wrong order) */
+ if(cs->tuples_copied) {
+ cs->inversions += inversion_count(
+ &bid_old,
+ off_old,
+ &(tuple->t_data->t_ctid.ip_blkid),
+ tuple->t_data->t_ctid.ip_posid) ;
+ }
+ bid_old = tuple->t_data->t_ctid.ip_blkid ;
+ off_old = tuple->t_data->t_ctid.ip_posid ;
+
heap_deform_tuple(tuple, oldTupDesc, values, isnull);
/* Be sure to null out any dropped columns */
***************
*** 875,880 ****
--- 917,924 ----
rewrite_heap_tuple(rwstate, tuple, copiedTuple);
heap_freetuple(copiedTuple);
+
+ cs->tuples_copied++ ;
}
index_endscan(scan);
***************
*** 1119,1121 ****
--- 1163,1171 ----
return rvs;
}
+
+ static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) {
+ return bid_old->bi_hi > bid->bi_hi
+ || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo > bid->bi_lo)
+ || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo==bid->bi_lo && off_old > off) ;
+ }
Index: src/backend/nodes/copyfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v
retrieving revision 1.408
diff -c -r1.408 copyfuncs.c
*** src/backend/nodes/copyfuncs.c 7 Oct 2008 19:27:04 -0000 1.408
--- src/backend/nodes/copyfuncs.c 13 Oct 2008 14:25:10 -0000
***************
*** 2259,2264 ****
--- 2259,2265 ----
COPY_NODE_FIELD(relation);
COPY_STRING_FIELD(indexname);
+ COPY_SCALAR_FIELD(verbose) ;
return newnode;
}
Index: src/backend/nodes/equalfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v
retrieving revision 1.333
diff -c -r1.333 equalfuncs.c
*** src/backend/nodes/equalfuncs.c 6 Oct 2008 17:39:26 -0000 1.333
--- src/backend/nodes/equalfuncs.c 13 Oct 2008 14:25:10 -0000
***************
*** 1000,1005 ****
--- 1000,1006 ----
{
COMPARE_NODE_FIELD(relation);
COMPARE_STRING_FIELD(indexname);
+ COMPARE_SCALAR_FIELD(verbose);
return true;
}
Index: src/backend/parser/gram.y
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/parser/gram.y,v
retrieving revision 2.625
diff -c -r2.625 gram.y
*** src/backend/parser/gram.y 4 Oct 2008 21:56:54 -0000 2.625
--- src/backend/parser/gram.y 13 Oct 2008 14:25:10 -0000
***************
*** 5738,5770 ****
/*****************************************************************************
*
* QUERY:
! * CLUSTER <qualified_name> [ USING <index_name> ]
! * CLUSTER
! * CLUSTER <index_name> ON <qualified_name> (for pre-8.3)
*
*****************************************************************************/
ClusterStmt:
! CLUSTER qualified_name cluster_index_specification
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = $2;
n->indexname = $3;
$$ = (Node*)n;
}
! | CLUSTER
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = NULL;
n->indexname = NULL;
$$ = (Node*)n;
}
/* kept for pre-8.3 compatibility */
! | CLUSTER index_name ON qualified_name
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = $4;
n->indexname = $2;
$$ = (Node*)n;
}
;
--- 5738,5773 ----
/*****************************************************************************
*
* QUERY:
! * CLUSTER <qualified_name> [ USING <index_name> ] [VERBOSE]
! * CLUSTER [VERBOSE]
! * CLUSTER <index_name> ON <qualified_name> [VERBOSE] (for pre-8.3)
*
*****************************************************************************/
ClusterStmt:
! CLUSTER qualified_name cluster_index_specification opt_verbose
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = $2;
n->indexname = $3;
+ n->verbose = $4;
$$ = (Node*)n;
}
! | CLUSTER opt_verbose
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = NULL;
n->indexname = NULL;
+ n->verbose = $2;
$$ = (Node*)n;
}
/* kept for pre-8.3 compatibility */
! | CLUSTER index_name ON qualified_name opt_verbose
{
ClusterStmt *n = makeNode(ClusterStmt);
n->relation = $4;
n->indexname = $2;
+ n->verbose = $5;
$$ = (Node*)n;
}
;
Index: src/include/nodes/parsenodes.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/nodes/parsenodes.h,v
retrieving revision 1.376
diff -c -r1.376 parsenodes.h
*** src/include/nodes/parsenodes.h 4 Oct 2008 21:56:55 -0000 1.376
--- src/include/nodes/parsenodes.h 13 Oct 2008 14:25:10 -0000
***************
*** 1940,1945 ****
--- 1940,1946 ----
NodeTag type;
RangeVar *relation; /* relation being indexed, or NULL if all */
char *indexname; /* original index defined */
+ bool verbose; /* print progress info */
} ClusterStmt;
/* ----------------------
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers