On Mon, Oct 13, 2008 at 8:30 AM, Tom Lane <[EMAIL PROTECTED]> wrote:

> Heikki Linnakangas <[EMAIL PROTECTED]> writes:
> > No, I was thinking of something along the lines of:
> > INFO:  clustering "public.my_c"
> > INFO:  complete, was 33%, now 100% clustered
> > The only such measure that we have is the correlation, which isn't very
> > good anyway, so I'm not sure if that's worthwhile.
>
> It'd be possible to count the number of order reversals during the
> indexscan, ie the number of tuples with CTID lower than the previous
> one's.  But I'm not sure how useful that number really is.  Also it's
> not clear how to preserve such functionality if cluster is
> re-implemented with a sort.
>
>                        regards, tom lane
>

Another version of the patch should be attached, this time counting the
number of "inversions" (pairs of tuples in the table that are in the wrong
order) as a measure of the "sortedness" of the original data (scanned/live
numbers still reported as an indication of the extent to which the table was
vacuumed).

N.B. -- I'm not familiar enough with Postgres internals to know if the
included inversion_count() method is a valid way to identify inversions.

In any case, example VERBOSE output:

postgres=# CLUSTER public.my_c VERBOSE ;
INFO:  clustering "public.my_c"
INFO:  complete, 15 tuples scanned, 10 tuples now live, 2 inversions
DETAIL:  CPU 0.00s/0.00u sec elapsed 0.00 sec.
CLUSTER
Index: src/backend/commands/cluster.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/commands/cluster.c,v
retrieving revision 1.177
diff -c -r1.177 cluster.c
*** src/backend/commands/cluster.c	19 Jun 2008 00:46:04 -0000	1.177
--- src/backend/commands/cluster.c	13 Oct 2008 14:25:09 -0000
***************
*** 46,51 ****
--- 46,52 ----
  #include "utils/snapmgr.h"
  #include "utils/syscache.h"
  #include "utils/tqual.h"
+ #include "utils/pg_rusage.h"
  
  
  /*
***************
*** 59,70 ****
  	Oid			indexOid;
  } RelToCluster;
  
! 
! static void cluster_rel(RelToCluster *rv, bool recheck);
! static void rebuild_relation(Relation OldHeap, Oid indexOid);
! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex);
  static List *get_tables_to_cluster(MemoryContext cluster_context);
! 
  
  
  /*---------------------------------------------------------------------------
--- 60,80 ----
  	Oid			indexOid;
  } RelToCluster;
  
! /*
!  * This struct is used to collect stats for VERBOSE output.
!  */
! typedef struct
! {
! 	double	tuples_scanned ;
! 	double	tuples_copied ;
! 	double	inversions ;
! } ClusterStatistics;
! 
! static void cluster_rel(RelToCluster *rv, bool recheck, int elevel);
! static void rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs);
! static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs);
  static List *get_tables_to_cluster(MemoryContext cluster_context);
! static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) ;
  
  
  /*---------------------------------------------------------------------------
***************
*** 176,182 ****
  		heap_close(rel, NoLock);
  
  		/* Do the job */
! 		cluster_rel(&rvtc, false);
  	}
  	else
  	{
--- 186,192 ----
  		heap_close(rel, NoLock);
  
  		/* Do the job */
! 		cluster_rel(&rvtc, false, stmt->verbose ? INFO : DEBUG2);
  	}
  	else
  	{
***************
*** 225,231 ****
  			StartTransactionCommand();
  			/* functions in indexes may want a snapshot set */
  			PushActiveSnapshot(GetTransactionSnapshot());
! 			cluster_rel(rvtc, true);
  			PopActiveSnapshot();
  			CommitTransactionCommand();
  		}
--- 235,241 ----
  			StartTransactionCommand();
  			/* functions in indexes may want a snapshot set */
  			PushActiveSnapshot(GetTransactionSnapshot());
! 			cluster_rel(rvtc, true, stmt->verbose ? INFO : DEBUG2);
  			PopActiveSnapshot();
  			CommitTransactionCommand();
  		}
***************
*** 253,261 ****
   * them incrementally while we load the table.
   */
  static void
! cluster_rel(RelToCluster *rvtc, bool recheck)
  {
  	Relation	OldHeap;
  
  	/* Check for user-requested abort. */
  	CHECK_FOR_INTERRUPTS();
--- 263,273 ----
   * them incrementally while we load the table.
   */
  static void
! cluster_rel(RelToCluster *rvtc, bool recheck, int elevel)
  {
  	Relation	OldHeap;
+ 	PGRUsage	ru0 ;
+ 	ClusterStatistics	cs ;
  
  	/* Check for user-requested abort. */
  	CHECK_FOR_INTERRUPTS();
***************
*** 343,349 ****
  	check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck);
  
  	/* rebuild_relation does all the dirty work */
! 	rebuild_relation(OldHeap, rvtc->indexOid);
  
  	/* NB: rebuild_relation does heap_close() on OldHeap */
  }
--- 355,373 ----
  	check_index_is_clusterable(OldHeap, rvtc->indexOid, recheck);
  
  	/* rebuild_relation does all the dirty work */
! 	ereport(elevel,
! 			(errmsg("clustering \"%s.%s\"",
! 					get_namespace_name(RelationGetNamespace(OldHeap)),
! 					RelationGetRelationName(OldHeap))));
! 	pg_rusage_init(&ru0);
! 	cs.tuples_scanned = cs.tuples_copied = cs.inversions = 0 ;
! 	rebuild_relation(OldHeap, rvtc->indexOid, &cs);
! 	ereport(elevel,
! 			(errmsg("complete, %.0f tuples scanned, %.0f tuples now live, %.0f inversions",
! 				cs.tuples_scanned,
! 				cs.tuples_copied,
! 				cs.inversions),
!                          errdetail("%s.", pg_rusage_show(&ru0))));
  
  	/* NB: rebuild_relation does heap_close() on OldHeap */
  }
***************
*** 556,566 ****
   *
   * OldHeap: table to rebuild --- must be opened and exclusive-locked!
   * indexOid: index to cluster by
   *
   * NB: this routine closes OldHeap at the right time; caller should not.
   */
  static void
! rebuild_relation(Relation OldHeap, Oid indexOid)
  {
  	Oid			tableOid = RelationGetRelid(OldHeap);
  	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
--- 580,591 ----
   *
   * OldHeap: table to rebuild --- must be opened and exclusive-locked!
   * indexOid: index to cluster by
+  * cs: ClusterStatistics for VERBOSE option
   *
   * NB: this routine closes OldHeap at the right time; caller should not.
   */
  static void
! rebuild_relation(Relation OldHeap, Oid indexOid, ClusterStatistics *cs)
  {
  	Oid			tableOid = RelationGetRelid(OldHeap);
  	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
***************
*** 593,599 ****
  	/*
  	 * Copy the heap data into the new table in the desired order.
  	 */
! 	frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid);
  
  	/* To make the new heap's data visible (probably not needed?). */
  	CommandCounterIncrement();
--- 618,624 ----
  	/*
  	 * Copy the heap data into the new table in the desired order.
  	 */
! 	frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid, cs);
  
  	/* To make the new heap's data visible (probably not needed?). */
  	CommandCounterIncrement();
***************
*** 703,709 ****
   * freeze cutoff point for the tuples.
   */
  static TransactionId
! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
  {
  	Relation	NewHeap,
  				OldHeap,
--- 728,734 ----
   * freeze cutoff point for the tuples.
   */
  static TransactionId
! copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ClusterStatistics *cs)
  {
  	Relation	NewHeap,
  				OldHeap,
***************
*** 719,724 ****
--- 744,751 ----
  	TransactionId OldestXmin;
  	TransactionId FreezeXid;
  	RewriteState rwstate;
+ 	BlockIdData bid_old = { 0, 0 } ;
+ 	OffsetNumber off_old = 0 ;
  
  	/*
  	 * Open the relations we need.
***************
*** 784,789 ****
--- 811,820 ----
  
  		CHECK_FOR_INTERRUPTS();
  
+ 		if(cs) {
+ 			cs->tuples_scanned++ ;
+ 		}
+ 
  		/* Since we used no scan keys, should never need to recheck */
  		if (scan->xs_recheck)
  			elog(ERROR, "CLUSTER does not support lossy index conditions");
***************
*** 856,861 ****
--- 887,903 ----
  		 *
  		 * So, we must reconstruct the tuple from component Datums.
  		 */
+ 		/* ...keep track of inversions (pairs of tuples in the table that are in the wrong order) */
+ 		if(cs->tuples_copied) {
+ 			cs->inversions += inversion_count(
+ 						&bid_old,
+ 						off_old,
+ 						&(tuple->t_data->t_ctid.ip_blkid),
+ 						tuple->t_data->t_ctid.ip_posid) ;
+ 		}
+ 		bid_old = tuple->t_data->t_ctid.ip_blkid ;
+ 		off_old = tuple->t_data->t_ctid.ip_posid ;
+ 
  		heap_deform_tuple(tuple, oldTupDesc, values, isnull);
  
  		/* Be sure to null out any dropped columns */
***************
*** 875,880 ****
--- 917,924 ----
  		rewrite_heap_tuple(rwstate, tuple, copiedTuple);
  
  		heap_freetuple(copiedTuple);
+ 
+ 		cs->tuples_copied++ ;
  	}
  
  	index_endscan(scan);
***************
*** 1119,1121 ****
--- 1163,1171 ----
  
  	return rvs;
  }
+ 
+ static int inversion_count(BlockIdData *bid_old, OffsetNumber off_old, BlockIdData *bid, OffsetNumber off) {
+   return    bid_old->bi_hi > bid->bi_hi
+          || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo > bid->bi_lo)
+          || (bid_old->bi_hi==bid->bi_hi && bid_old->bi_lo==bid->bi_lo && off_old > off) ;
+ }
Index: src/backend/nodes/copyfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v
retrieving revision 1.408
diff -c -r1.408 copyfuncs.c
*** src/backend/nodes/copyfuncs.c	7 Oct 2008 19:27:04 -0000	1.408
--- src/backend/nodes/copyfuncs.c	13 Oct 2008 14:25:10 -0000
***************
*** 2259,2264 ****
--- 2259,2265 ----
  
  	COPY_NODE_FIELD(relation);
  	COPY_STRING_FIELD(indexname);
+ 	COPY_SCALAR_FIELD(verbose) ;
  
  	return newnode;
  }
Index: src/backend/nodes/equalfuncs.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v
retrieving revision 1.333
diff -c -r1.333 equalfuncs.c
*** src/backend/nodes/equalfuncs.c	6 Oct 2008 17:39:26 -0000	1.333
--- src/backend/nodes/equalfuncs.c	13 Oct 2008 14:25:10 -0000
***************
*** 1000,1005 ****
--- 1000,1006 ----
  {
  	COMPARE_NODE_FIELD(relation);
  	COMPARE_STRING_FIELD(indexname);
+ 	COMPARE_SCALAR_FIELD(verbose);
  
  	return true;
  }
Index: src/backend/parser/gram.y
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/parser/gram.y,v
retrieving revision 2.625
diff -c -r2.625 gram.y
*** src/backend/parser/gram.y	4 Oct 2008 21:56:54 -0000	2.625
--- src/backend/parser/gram.y	13 Oct 2008 14:25:10 -0000
***************
*** 5738,5770 ****
  /*****************************************************************************
   *
   *		QUERY:
!  *				CLUSTER <qualified_name> [ USING <index_name> ]
!  *				CLUSTER
!  *				CLUSTER <index_name> ON <qualified_name> (for pre-8.3)
   *
   *****************************************************************************/
  
  ClusterStmt:
! 			CLUSTER qualified_name cluster_index_specification
  				{
  			       ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = $2;
  				   n->indexname = $3;
  				   $$ = (Node*)n;
  				}
! 			| CLUSTER
  			    {
  				   ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = NULL;
  				   n->indexname = NULL;
  				   $$ = (Node*)n;
  				}
  			/* kept for pre-8.3 compatibility */
! 			| CLUSTER index_name ON qualified_name
  				{
  				   ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = $4;
  				   n->indexname = $2;
  				   $$ = (Node*)n;
  				}
  		;
--- 5738,5773 ----
  /*****************************************************************************
   *
   *		QUERY:
!  *				CLUSTER <qualified_name> [ USING <index_name> ] [VERBOSE]
!  *				CLUSTER [VERBOSE]
!  *				CLUSTER <index_name> ON <qualified_name> [VERBOSE] (for pre-8.3)
   *
   *****************************************************************************/
  
  ClusterStmt:
! 			CLUSTER qualified_name cluster_index_specification opt_verbose
  				{
  			       ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = $2;
  				   n->indexname = $3;
+ 				   n->verbose = $4;
  				   $$ = (Node*)n;
  				}
! 			| CLUSTER opt_verbose
  			    {
  				   ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = NULL;
  				   n->indexname = NULL;
+ 				   n->verbose = $2;
  				   $$ = (Node*)n;
  				}
  			/* kept for pre-8.3 compatibility */
! 			| CLUSTER index_name ON qualified_name opt_verbose
  				{
  				   ClusterStmt *n = makeNode(ClusterStmt);
  				   n->relation = $4;
  				   n->indexname = $2;
+ 				   n->verbose = $5;
  				   $$ = (Node*)n;
  				}
  		;
Index: src/include/nodes/parsenodes.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/nodes/parsenodes.h,v
retrieving revision 1.376
diff -c -r1.376 parsenodes.h
*** src/include/nodes/parsenodes.h	4 Oct 2008 21:56:55 -0000	1.376
--- src/include/nodes/parsenodes.h	13 Oct 2008 14:25:10 -0000
***************
*** 1940,1945 ****
--- 1940,1946 ----
  	NodeTag		type;
  	RangeVar   *relation;		/* relation being indexed, or NULL if all */
  	char	   *indexname;		/* original index defined */
+ 	bool		verbose;		/* print progress info */
  } ClusterStmt;
  
  /* ----------------------
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to