On Thu, 2006-01-26 at 00:33 +0000, Simon Riggs wrote:
> The enclosed patch substantially improves large sort performance, in the
> general case
> 
> cvstip:       elapsed 5693 sec, CPU 196 sec
> patch:        elapsed 4132 sec, CPU 90 sec
> 

Following patch implements matching sort cost calculations in the
planner in sort_cost()

This patch is in-addition to the previous patch.

Best Regards, Simon Riggs
Index: src/backend/optimizer/path/costsize.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v
retrieving revision 1.152
diff -c -r1.152 costsize.c
*** src/backend/optimizer/path/costsize.c	28 Dec 2005 01:29:59 -0000	1.152
--- src/backend/optimizer/path/costsize.c	31 Jan 2006 19:30:15 -0000
***************
*** 70,79 ****
  #include "utils/selfuncs.h"
  #include "utils/lsyscache.h"
  #include "utils/syscache.h"
! 
  
  #define LOG2(x)  (log(x) / 0.693147180559945)
- #define LOG6(x)  (log(x) / 1.79175946922805)
  
  /*
   * Some Paths return less than the nominal number of rows of their parent
--- 70,78 ----
  #include "utils/selfuncs.h"
  #include "utils/lsyscache.h"
  #include "utils/syscache.h"
! #include "utils/tuplesort.h"
  
  #define LOG2(x)  (log(x) / 0.693147180559945)
  
  /*
   * Some Paths return less than the nominal number of rows of their parent
***************
*** 767,779 ****
   * If the total volume exceeds work_mem, we switch to a tape-style merge
   * algorithm.  There will still be about t*log2(t) tuple comparisons in
   * total, but we will also need to write and read each tuple once per
!  * merge pass.	We expect about ceil(log6(r)) merge passes where r is the
!  * number of initial runs formed (log6 because tuplesort.c uses six-tape
!  * merging).  Since the average initial run should be about twice work_mem,
   * we have
!  *		disk traffic = 2 * relsize * ceil(log6(p / (2*work_mem)))
   *		cpu = comparison_cost * t * log2(t)
   *
   * The disk traffic is assumed to be half sequential and half random
   * accesses (XXX can't we refine that guess?)
   *
--- 766,783 ----
   * If the total volume exceeds work_mem, we switch to a tape-style merge
   * algorithm.  There will still be about t*log2(t) tuple comparisons in
   * total, but we will also need to write and read each tuple once per
!  * merge pass.	We expect about ceil(logT(r)) merge passes where r is the
!  * number of initial runs formed and logT means logarithm-in-base-T because
!  * tuplesort.c uses variable numbers of logical tapes (T) to perform merging.
!  * Since the average initial run should be about twice work_mem,
   * we have
!  *      num logical tapes = work_mem / OPTIMAL_MERGE_BUFFER_SIZE
!  *		disk traffic = 2 * relsize * ceil(logT(p / (2*work_mem)))
   *		cpu = comparison_cost * t * log2(t)
   *
+  * Fully sorted data will appear as only a single run, no matter how large,
+  * so this is an overestimate in that case, but a safe assumption
+  *
   * The disk traffic is assumed to be half sequential and half random
   * accesses (XXX can't we refine that guess?)
   *
***************
*** 824,834 ****
  	{
  		double		npages = ceil(nbytes / BLCKSZ);
  		double		nruns = (nbytes / work_mem_bytes) * 0.5;
! 		double		log_runs = ceil(LOG6(nruns));
  		double		npageaccesses;
  
! 		if (log_runs < 1.0)
! 			log_runs = 1.0;
  		npageaccesses = 2.0 * npages * log_runs;
  		/* Assume half are sequential (cost 1), half are not */
  		startup_cost += npageaccesses *
--- 828,847 ----
  	{
  		double		npages = ceil(nbytes / BLCKSZ);
  		double		nruns = (nbytes / work_mem_bytes) * 0.5;
!         double      ntapes = (work_mem_bytes / OPTIMAL_MERGE_BUFFER_SIZE);
! 		double		log_runs;
  		double		npageaccesses;
  
!         /* 
!          * The number of runs will vary in proportion to the logT
!          * We use the invariant scaling property of logs to calculate
!          * the log in a variable base
!          */
!         if (ntapes < nruns)
!             log_runs = ceil (log(nruns) / log(ntapes));
!         else
!             log_runs = 1.0;
! 
  		npageaccesses = 2.0 * npages * log_runs;
  		/* Assume half are sequential (cost 1), half are not */
  		startup_cost += npageaccesses *
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to