On Mon, Oct 13, 2008 at 8:30 AM, Tom Lane <[EMAIL PROTECTED]> wrote: > Heikki Linnakangas <[EMAIL PROTECTED]> writes: > > No, I was thinking of something along the lines of: > > INFO: clustering "public.my_c" > > INFO: complete, was 33%, now 100% clustered > > The only such measure that we have is the correlation, which isn't very > > good anyway, so I'm not sure if that's worthwhile. > > It'd be possible to count the number of order reversals during the > indexscan, ie the number of tuples with CTID lower than the previous > one's. But I'm not sure how useful that number really is. Also it's > not clear how to preserve such functionality if cluster is > re-implemented with a sort. > > regards, tom lane >
Another version of the patch should be attached, this time counting the number of "inversions" (pairs of tuples in the table that are in the wrong order) as a measure of the "sortedness" of the original data (scanned/live numbers still reported as an indication of the extent to which the table was vacuumed). N.B. -- I'm not familiar enough with Postgres internals to know if the included inversion_count() method is a valid way to identify inversions. In any case, example VERBOSE output: postgres=# CLUSTER public.my_c VERBOSE ; INFO: clustering "public.my_c" INFO: complete, 15 tuples scanned, 10 tuples now live, 2 inversions DETAIL: CPU 0.00s/0.00u sec elapsed 0.00 sec. CLUSTER
Index: src/backend/commands/cluster.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/commands/cluster.c,v retrieving revision 1.177 diff -c -r1.177 cluster.c *** src/backend/commands/cluster.c 19 Jun 2008 00:46:04 -0000 1.177 --- src/backend/commands/cluster.c 13 Oct 2008 14:25:09 -0000 *************** *** 46,51 **** --- 46,52 ---- #include "utils/snapmgr.h" #include "utils/syscache.h" #include "utils/tqual.h" + #include "utils/pg_rusage.h" /* *************** *** 59,70 **** Oid indexOid; } RelToCluster; ! ! static void cluster_rel(RelToCluster *rv, bool recheck); ! static void rebuild_relation(Relation OldHeap, Oid indexOid); ! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex); static List *get_tables_to_cluster(MemoryContext cluster_context); ! /*--------------------------------------------------------------------------- --- 60,80 ---- Oid indexOid; } RelToCluster; ! /* ! * This struct is used to collect stats for VERBOSE output. ! */ ! typedef struct ! { ! double tuples_scanned ; ! double tuples_copied ; ! double inversions ; ! } ClusterStatistics; ! ! static void cluster_rel(RelToCluster *rv, bool recheck, int elevel); ! static void rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs); ! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs); static List *get_tables_to_cluster(MemoryContext cluster_context); ! static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) ; /*--------------------------------------------------------------------------- *************** *** 176,182 **** heap_close(rel, NoLock); /* Do the job */ ! cluster_rel(&rvtc, false); } else { --- 186,192 ---- heap_close(rel, NoLock); /* Do the job */ ! cluster_rel(&rvtc, false, stmt->verbose ? INFO : DEBUG2); } else { *************** *** 225,231 **** StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); ! cluster_rel(rvtc, true); PopActiveSnapshot(); CommitTransactionCommand(); } --- 235,241 ---- StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); ! cluster_rel(rvtc, true, stmt->verbose ? INFO : DEBUG2); PopActiveSnapshot(); CommitTransactionCommand(); } *************** *** 253,261 **** * them incrementally while we load the table. */ static void ! cluster_rel(RelToCluster *rvtc, bool recheck) { Relation OldHeap; /* Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); --- 263,273 ---- * them incrementally while we load the table. */ static void ! cluster_rel(RelToCluster *rvtc, bool recheck, int elevel) { Relation OldHeap; + PGRUsage ru0 ; + ClusterStatistics cs ; /* Check for user-requested abort. */ CHECK_FOR_INTERRUPTS(); *************** *** 343,349 **** check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck); /* rebuild_relation does all the dirty work */ ! rebuild_relation(OldHeap, rvtc->indexOid); /* NB: rebuild_relation does heap_close() on OldHeap */ } --- 355,373 ---- check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck); /* rebuild_relation does all the dirty work */ ! ereport(elevel, ! (errmsg("clustering \"%s.%s\"", ! get_namespace_name(RelationGetNamespace(OldHeap)), ! RelationGetRelationName(OldHeap)))); ! pg_rusage_init(&ru0); ! cs.tuples_scanned = cs.tuples_copied = cs.inversions = 0 ; ! rebuild_relation(OldHeap, rvtc->indexOid, &cs); ! ereport(elevel, ! (errmsg("complete, %.0f tuples scanned, %.0f tuples now live, %.0f inversions", ! cs.tuples_scanned, ! cs.tuples_copied, ! cs.inversions), ! errdetail("%s.", pg_rusage_show(&ru0)))); /* NB: rebuild_relation does heap_close() on OldHeap */ } *************** *** 556,566 **** * * OldHeap: table to rebuild --- must be opened and exclusive-locked! * indexOid: index to cluster by * * NB: this routine closes OldHeap at the right time; caller should not. */ static void ! rebuild_relation(Relation OldHeap, Oid indexOid) { Oid tableOid = RelationGetRelid(OldHeap); Oid tableSpace = OldHeap->rd_rel->reltablespace; --- 580,591 ---- * * OldHeap: table to rebuild --- must be opened and exclusive-locked! * indexOid: index to cluster by + * cs: ClusterStatistics for VERBOSE option * * NB: this routine closes OldHeap at the right time; caller should not. */ static void ! rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs) { Oid tableOid = RelationGetRelid(OldHeap); Oid tableSpace = OldHeap->rd_rel->reltablespace; *************** *** 593,599 **** /* * Copy the heap data into the new table in the desired order. */ ! frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid); /* To make the new heap's data visible (probably not needed?). */ CommandCounterIncrement(); --- 618,624 ---- /* * Copy the heap data into the new table in the desired order. */ ! frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid, cs); /* To make the new heap's data visible (probably not needed?). */ CommandCounterIncrement(); *************** *** 703,709 **** * freeze cutoff point for the tuples. */ static TransactionId ! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex) { Relation NewHeap, OldHeap, --- 728,734 ---- * freeze cutoff point for the tuples. */ static TransactionId ! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs) { Relation NewHeap, OldHeap, *************** *** 719,724 **** --- 744,751 ---- TransactionId OldestXmin; TransactionId FreezeXid; RewriteState rwstate; + BlockIdData bid_old = { 0, 0 } ; + OffsetNumber off_old = 0 ; /* * Open the relations we need. *************** *** 784,789 **** --- 811,820 ---- CHECK_FOR_INTERRUPTS(); + if(cs) { + cs->tuples_scanned++ ; + } + /* Since we used no scan keys, should never need to recheck */ if (scan->xs_recheck) elog(ERROR, "CLUSTER does not support lossy index conditions"); *************** *** 856,861 **** --- 887,903 ---- * * So, we must reconstruct the tuple from component Datums. */ + /* ...keep track of inversions (pairs of tuples in the table that are in the wrong order) */ + if(cs->tuples_copied) { + cs->inversions += inversion_count( + &bid_old, + off_old, + &(tuple->t_data->t_ctid.ip_blkid), + tuple->t_data->t_ctid.ip_posid) ; + } + bid_old = tuple->t_data->t_ctid.ip_blkid ; + off_old = tuple->t_data->t_ctid.ip_posid ; + heap_deform_tuple(tuple, oldTupDesc, values, isnull); /* Be sure to null out any dropped columns */ *************** *** 875,880 **** --- 917,924 ---- rewrite_heap_tuple(rwstate, tuple, copiedTuple); heap_freetuple(copiedTuple); + + cs->tuples_copied++ ; } index_endscan(scan); *************** *** 1119,1121 **** --- 1163,1171 ---- return rvs; } + + static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) { + return bid_old->bi_hi > bid->bi_hi + || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo > bid->bi_lo) + || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo==bid->bi_lo && off_old > off) ; + } Index: src/backend/nodes/copyfuncs.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v retrieving revision 1.408 diff -c -r1.408 copyfuncs.c *** src/backend/nodes/copyfuncs.c 7 Oct 2008 19:27:04 -0000 1.408 --- src/backend/nodes/copyfuncs.c 13 Oct 2008 14:25:10 -0000 *************** *** 2259,2264 **** --- 2259,2265 ---- COPY_NODE_FIELD(relation); COPY_STRING_FIELD(indexname); + COPY_SCALAR_FIELD(verbose) ; return newnode; } Index: src/backend/nodes/equalfuncs.c =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v retrieving revision 1.333 diff -c -r1.333 equalfuncs.c *** src/backend/nodes/equalfuncs.c 6 Oct 2008 17:39:26 -0000 1.333 --- src/backend/nodes/equalfuncs.c 13 Oct 2008 14:25:10 -0000 *************** *** 1000,1005 **** --- 1000,1006 ---- { COMPARE_NODE_FIELD(relation); COMPARE_STRING_FIELD(indexname); + COMPARE_SCALAR_FIELD(verbose); return true; } Index: src/backend/parser/gram.y =================================================================== RCS file: /projects/cvsroot/pgsql/src/backend/parser/gram.y,v retrieving revision 2.625 diff -c -r2.625 gram.y *** src/backend/parser/gram.y 4 Oct 2008 21:56:54 -0000 2.625 --- src/backend/parser/gram.y 13 Oct 2008 14:25:10 -0000 *************** *** 5738,5770 **** /***************************************************************************** * * QUERY: ! * CLUSTER <qualified_name> [ USING <index_name> ] ! * CLUSTER ! * CLUSTER <index_name> ON <qualified_name> (for pre-8.3) * *****************************************************************************/ ClusterStmt: ! CLUSTER qualified_name cluster_index_specification { ClusterStmt *n = makeNode(ClusterStmt); n->relation = $2; n->indexname = $3; $$ = (Node*)n; } ! | CLUSTER { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; $$ = (Node*)n; } /* kept for pre-8.3 compatibility */ ! | CLUSTER index_name ON qualified_name { ClusterStmt *n = makeNode(ClusterStmt); n->relation = $4; n->indexname = $2; $$ = (Node*)n; } ; --- 5738,5773 ---- /***************************************************************************** * * QUERY: ! * CLUSTER <qualified_name> [ USING <index_name> ] [VERBOSE] ! * CLUSTER [VERBOSE] ! * CLUSTER <index_name> ON <qualified_name> [VERBOSE] (for pre-8.3) * *****************************************************************************/ ClusterStmt: ! CLUSTER qualified_name cluster_index_specification opt_verbose { ClusterStmt *n = makeNode(ClusterStmt); n->relation = $2; n->indexname = $3; + n->verbose = $4; $$ = (Node*)n; } ! | CLUSTER opt_verbose { ClusterStmt *n = makeNode(ClusterStmt); n->relation = NULL; n->indexname = NULL; + n->verbose = $2; $$ = (Node*)n; } /* kept for pre-8.3 compatibility */ ! | CLUSTER index_name ON qualified_name opt_verbose { ClusterStmt *n = makeNode(ClusterStmt); n->relation = $4; n->indexname = $2; + n->verbose = $5; $$ = (Node*)n; } ; Index: src/include/nodes/parsenodes.h =================================================================== RCS file: /projects/cvsroot/pgsql/src/include/nodes/parsenodes.h,v retrieving revision 1.376 diff -c -r1.376 parsenodes.h *** src/include/nodes/parsenodes.h 4 Oct 2008 21:56:55 -0000 1.376 --- src/include/nodes/parsenodes.h 13 Oct 2008 14:25:10 -0000 *************** *** 1940,1945 **** --- 1940,1946 ---- NodeTag type; RangeVar *relation; /* relation being indexed, or NULL if all */ char *indexname; /* original index defined */ + bool verbose; /* print progress info */ } ClusterStmt; /* ----------------------
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers