Hello All,

We have prepared a patch (against CVS HEAD)for the TODO item:
* Hash
  -During index creation, pre-sort the tuples to improve build speed
     http://archives.postgresql.org/pgsql-hackers/2007-03/msg01199.php

Details of this patch's performance improvements can be found at http://web.cecs.pdx.edu/~raneyt/pg/.

For example, in one 12 million tuple example, the 8.2.4 hash index
build required over 2.8 hours while this patch's build required 80
seconds. Hash build times are now comparable to BTree build times,
for example, for a 60 million tuple example the patched hash index
build time is 8.1 minutes and the BTree build time is 4.6 minutes.

We used spool functions from the BTree code to sort the index
tuples. Sorting is done on the hash value of the tuples.  The hash
value depends on the number of primary bucket pages (henceforth
just bucket pages) that will be required to fit all the index
tuples. So, before sorting, the base relation is scanned to get
the total number of tuples. Then, the fill factor (either default
or user defined, as applicable) is used to get the estimate of the
number of bucket pages.

The 8.2.4 code builds the index by inserting one tuple at a time
and splitting buckets as needed.  We pre-allocate the estimated
number of bucket pages all at once. This avoids bucket page splits
and thus redistribution of index tuples between the bucket page
that caused the split and the newly created bucket page.

We changed the values of low_mask, high_mask and max_bucket, used
by hashkey2bucket () while inserting index tuples to bucket pages.
They are set as follows:
Low_mask = (power of 2 value-1) less than the estimate.
High_mask = (power of 2 value-1) greater than the estimate.
Max_bucket = estimated number of bucket -1 (because bucket numbers
start from 0).

(Please note: hashsort.c is a new file and resides in src/backend/access/hash/)

We have also attached the simple data generator we used in the
tests.  Our SQL statements were as follows:

DROP TABLE IF EXISTS test;

CREATE TABLE test (
  value      INTEGER
);

COPY test FROM 'data';
\timing
CREATE INDEX hash ON test USING HASH(value);

Regards,
Shreya Bhargava <[EMAIL PROTECTED]>
Tom Raney <[EMAIL PROTECTED]>

/*---------------------------------------------------------------------------
 * hashsort.c
 *-------------------------------------------------------------------------*/

/* 
 * Functions needed to support initializing and sorting tuples in the spool
 * in hash.c
 */


#include "postgres.h"

#include "access/hash.h"
#include "miscadmin.h"
#include "storage/smgr.h"
#include "utils/tuplesort.h"

struct HSpool
{
        /* State data for tuplesort.c */
        Tuplesortstate *sortstate; 
        Relation index;
};

/* create and initialize the spool structure */
HSpool *h_spoolinit(Relation index)
{
        HSpool *hspool;
        int hKbytes;
        
        hspool = (HSpool *) palloc0(sizeof(HSpool));

        hspool->index = index;

        /* hKbytes is the amount of memory we are going to use
         * to sort the spool.  
         */

        hKbytes = maintenance_work_mem;
        hspool->sortstate = tuplesort_begin_index(index,false,hKbytes,false);

        /* Substitute the default compare call-back function
         * for a specific hash index build compare function.
         */

        tuplesort_set_hashindex(hspool->sortstate);
        
        return hspool;

}

void h_spool(IndexTuple itup, HSpool *hspool)
{
        tuplesort_putindextuple(hspool->sortstate, itup);
}

/* 
 * h_bkt_num() estimates the number of buckets
 * in the final hash table.  
 *
 */

uint32 h_bkt_num(uint32 tuples, Relation rel)
{
        int32 ffactor;
        int32 data_width;
        int32 item_width;
        uint32 bkt_num;

        /*
         * Calculate the fill factor.  We could get this from the meta page
         * but at the point this method is called, the meta page has not been
         * created.
         *
         */

        data_width = get_typavgwidth(RelationGetDescr(rel)->attrs[0]->atttypid,
                                        
RelationGetDescr(rel)->attrs[0]->atttypmod);
        item_width = MAXALIGN(sizeof(IndexTupleData)) + MAXALIGN(data_width) +
                                        sizeof(ItemIdData);

        ffactor = RelationGetTargetPageUsage(rel, HASH_DEFAULT_FILLFACTOR) / 
item_width;
        if (ffactor < 10)
                ffactor = 10;

        bkt_num = tuples / ffactor;
        bkt_num = bkt_num +1;

        return bkt_num;
}

/*      In order to define an order for the index tuples, there must be a mask
 *      applied to the 32 bit hash value of the index key during the sort 
 *      process.
 *      For example, if there are 4,555 buckets approximated, the mask (or 
modulo) 
 *      would be 8,191 (hex value 1FFF). 
 *
 */

void h_set_bkt_mask(HSpool *spool, uint32 bkts) {
        uint32 bkt_pwr2, bkt_mask;

        bkt_pwr2 = _hash_log2(bkts);
        bkt_mask = (1<<(bkt_pwr2))-1;

        tuplesort_set_bktmask(spool->sortstate, bkt_mask);      
}

void h_do_sort(HSpool *spool)
{
        tuplesort_performsort(spool->sortstate);
}

void h_spooldestroy(HSpool *hspool)
{
        tuplesort_end(hspool->sortstate);
        pfree(hspool);
}


/* 
 * h_print_spool() reads the itups back from the spool, which are now
 * in sorted order by hash value. As each itup is read from the spool, it is
 * inserted into the hash index.
 *
 */

void h_print_spool(HSpool *spool)
{
        bool isfirstnull;
        bool should_free;
        IndexTuple itup= NULL;
        Datum attr1;
        TupleDesc tupd = RelationGetDescr(spool->index);
        
        while (itup = tuplesort_getindextuple(spool->sortstate, true, 
&should_free))
        {       
                
                attr1 = index_getattr(itup,1, tupd, &isfirstnull);
                if(!isfirstnull)
                {
                        _hash_doinsert(spool->index, itup);
                }
        }
}

#include <stdio.h>
#include <stdlib.h>


extern int main(int argc, char **argv) {
      char *p;
      long i, tups, seed;

      if (argc < 3) {
              printf("Too few args.  <tuples> <seed>\n");
              return 0;
      }

      tups = strtol(argv[1],&p,0);
      seed = strtol(argv[2], &p, 0);
      srand(seed);

      for (i=0; i< tups; i++) {
              printf("%d\n", rand());
      }

      return 0;

} 
*** ../../../src/backend/access/hash/hash.c.orig        2007-09-23 
19:01:09.000000000 -0700
--- ../../../src/backend/access/hash/hash.c     2007-09-24 18:01:27.709487000 
-0700
***************
*** 27,35 ****
  /* Working state for hashbuild and its callback */
  typedef struct
  {
!       double          indtuples;
  } HashBuildState;
  
  static void hashbuildCallback(Relation index,
                                  HeapTuple htup,
                                  Datum *values,
--- 27,45 ----
  /* Working state for hashbuild and its callback */
  typedef struct
  {
!       double          indtuples; /* The current number of index tuples */
!       Relation        heapRel;   /* The index covers this heap relation */
!       HSpool          *spool;    /* Used to sort the index tuples before 
insertion into the index */
  } HashBuildState;
  
+ 
+ static void countTupleCallBack(Relation indx,
+                                 HeapTuple htup,
+                                 Datum *values,
+                                 bool *isnull,
+                                 bool tupleIsAlive,
+                                 void *state);
+ 
  static void hashbuildCallback(Relation index,
                                  HeapTuple htup,
                                  Datum *values,
***************
*** 40,46 ****
--- 50,87 ----
  
  /*
   *    hashbuild() -- build a new hash index.
+  *
+  *
+  *   The algorithm:
+  *    (1) Initialize the build state
+  *    (2) Scan the heap file to determine the number of rows
+  *    (3) Transform the heap file tuples into index tuples (itups),
+  *        while inserting them into a spool.  If the spool overflows
+  *        memory, sort it into runs and spill it to disk
+  *    (4) Finish sorting the spool
+  *    (5) Pre-initialize all the buckets of the final index
+  *    (6) Insert itups from the spool into the index
+  *
+  *   Sorting the tuples before inserting them into the index is a classical
+  * bulk-load technique, also used in the BTree code.  The sort is done in
+  * hash value order.
+  *   Pre-allocating the buckets minimizes the number of overflow pages.
+  *   The reason for step (2) is that in order to sort, in step (3), one must
+  * know the hash value, which depends on the number of buckets, which in turn
+  * depends on the number of itups = the number of rows in the heap file.
+  *   Steps (3),(4) and (6) parallel similar steps in the BTree code.
+  *
+  *   Here is an alternative algorithm:
+  *    (1') Same as (1)
+  *    (2') Scan the heap file, counting the number of rows, forming index
+  *         tuples and inserting them into a spool (the spool is not 
presorted).
+  *    (3') Sort the spool
+  *    (4') same as (5)
+  *    (5') same as (6)
+  *    Although this algorithm would be somewhat faster, we prefer the existing
+  * algorithm because it reuses existing BTree code.
   */
+ 
  Datum
  hashbuild(PG_FUNCTION_ARGS)
  {
***************
*** 50,55 ****
--- 91,100 ----
        IndexBuildResult *result;
        double          reltuples;
        HashBuildState buildstate;
+       uint32          tuples;
+       HashMetaPage    metap;
+       Buffer          metabuf;
+       uint32          num_bkt; /* Estimates number of buckets in the final 
index */
  
        /*
         * We expect to be called exactly once for any index relation. If that's
***************
*** 59,85 ****
                elog(ERROR, "index \"%s\" already contains data",
                         RelationGetRelationName(index));
  
!       /* initialize the hash index metadata page */
!       _hash_metapinit(index);
! 
!       /* build the index */
        buildstate.indtuples = 0;
  
!       /* do the heap scan */
!       reltuples = IndexBuildHeapScan(heap, index, indexInfo,
!                                                                  
hashbuildCallback, (void *) &buildstate);
  
-       /*
-        * Return statistics
-        */
-       result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
  
!       result->heap_tuples = reltuples;
!       result->index_tuples = buildstate.indtuples;
  
        PG_RETURN_POINTER(result);
  }
  
  /*
   * Per-tuple callback from IndexBuildHeapScan
   */
--- 104,183 ----
                elog(ERROR, "index \"%s\" already contains data",
                         RelationGetRelationName(index));
  
!       /* initialize the build state */
        buildstate.indtuples = 0;
+       buildstate.heapRel = heap;
+       buildstate.spool = h_spoolinit(index);
  
!        /*
!       * Scan the heap file to determine the number of rows
!       *
!         */
!         tuples=0;
!         IndexBuildHeapScan(heap, index, indexInfo, countTupleCallBack, 
(void*) &tuples);
! 
!         num_bkt = h_bkt_num(tuples, index); /* calculate the number of 
buckets in the final index */
!         h_set_bkt_mask(buildstate.spool, num_bkt); /* set the bucket mask for 
the compare function */
! 
!         /*
!        * Pre-initialize all the buckets of the final index
!          */
!         _hash_metapinit(index, num_bkt);
! 
!         /*
!          * Transform the heap file tuples into index tuples (itups),
!          * while inserting them into a spool.  If the spool overflows
!          * memory, sort it into runs and spill it to disk
!          *
!          */
!         reltuples = IndexBuildHeapScan(heap, index, indexInfo,
!                                                                    
hashbuildCallback, (void *) &buildstate);
! 
! 
!         /*
!          * Finish sorting the spool
!          */
!         h_do_sort(buildstate.spool);
! 
!         /*
!          * Insert itups from the spool into the index
!          */
!         h_print_spool(buildstate.spool);
! 
! 
!         /* Read the meta page just initialized */
!         metabuf = _hash_getbuf(index, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
!         _hash_checkpage(index, metabuf, LH_META_PAGE);
!         metap   = (HashMetaPage) BufferGetPage(metabuf);
! 
!         /* Gather result, destroy spool, return */
! 
!         result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
!         _hash_relbuf(index,metabuf);
!         result->heap_tuples = reltuples;
!         result->index_tuples = buildstate.indtuples;
  
  
!         h_spooldestroy(buildstate.spool);
  
        PG_RETURN_POINTER(result);
  }
  
+ 
+ /* This function is used to count the tuples in the relation we are indexing.
+  */
+ 
+ static void
+ countTupleCallBack(Relation index, HeapTuple htup, Datum *values,
+                                 bool *isnull,
+                                 bool tupleIsAlive,
+                                 void *ptr)
+ {
+         uint32 *tuples = (uint32 *) ptr;
+         *tuples += 1;
+ }
+ 
+ 
  /*
   * Per-tuple callback from IndexBuildHeapScan
   */
***************
*** 104,111 ****
                pfree(itup);
                return;
        }
! 
!       _hash_doinsert(index, itup);
  
        buildstate->indtuples += 1;
  
--- 202,212 ----
                pfree(itup);
                return;
        }
!         else
!         {
!                 /* Place each itup into the spool for sorting */
!                 h_spool(itup, buildstate->spool);
!         }
  
        buildstate->indtuples += 1;
  
*** ../../../src/backend/access/hash/hashpage.c.orig    2007-09-23 
19:31:22.000000000 -0700
--- ../../../src/backend/access/hash/hashpage.c 2007-09-23 20:46:09.000000000 
-0700
***************
*** 313,326 ****
  /*
   *    _hash_metapinit() -- Initialize the metadata page of a hash index,
   *                            the two buckets that we begin with and the 
initial
!  *                            bitmap page.
   *
   * We are fairly cavalier about locking here, since we know that no one else
   * could be accessing this index.  In particular the rule about not holding
   * multiple buffer locks is ignored.
   */
  void
! _hash_metapinit(Relation rel)
  {
        HashMetaPage metap;
        HashPageOpaque pageopaque;
--- 313,326 ----
  /*
   *    _hash_metapinit() -- Initialize the metadata page of a hash index,
   *                            the two buckets that we begin with and the 
initial
!  *                            bitmap page, plus num_bkt buckets.
   *
   * We are fairly cavalier about locking here, since we know that no one else
   * could be accessing this index.  In particular the rule about not holding
   * multiple buffer locks is ignored.
   */
  void
! _hash_metapinit(Relation rel, uint32 num_bkt)
  {
        HashMetaPage metap;
        HashPageOpaque pageopaque;
***************
*** 330,342 ****
        int32           data_width;
        int32           item_width;
        int32           ffactor;
!       uint16          i;
  
        /* safety check */
        if (RelationGetNumberOfBlocks(rel) != 0)
                elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
                         RelationGetRelationName(rel));
  
        /*
         * Determine the target fill factor (in tuples per bucket) for this 
index.
         * The idea is to make the fill factor correspond to pages about as full
--- 330,354 ----
        int32           data_width;
        int32           item_width;
        int32           ffactor;
!       uint32          i;
!       uint32          lg2buckets;
!       uint32          pwr2;
!       BlockNumber     start_blk;      
  
        /* safety check */
        if (RelationGetNumberOfBlocks(rel) != 0)
                elog(ERROR, "cannot initialize non-empty hash index \"%s\"",
                         RelationGetRelationName(rel));
  
+ 
+         /* The minimum number of buckets is 2, so if we are below
+          * that number, let's fix that here
+          */
+ 
+         if (num_bkt < 2)
+                 num_bkt = 2;
+ 
+ 
        /*
         * Determine the target fill factor (in tuples per bucket) for this 
index.
         * The idea is to make the fill factor correspond to pages about as full
***************
*** 401,431 ****
         * We initialize the index with two buckets, 0 and 1, occupying physical
         * blocks 1 and 2.      The first freespace bitmap page is in block 3.
         */
!       metap->hashm_maxbucket = metap->hashm_lowmask = 1;      /* nbuckets - 1 
*/
!       metap->hashm_highmask = 3;      /* (nbuckets << 1) - 1 */
  
        MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
        MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
  
        metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
!       metap->hashm_ovflpoint = 1;
        metap->hashm_firstfree = 0;
  
!       /*
!        * Initialize the first two buckets
!        */
!       for (i = 0; i <= 1; i++)
!       {
!               buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
!               pg = BufferGetPage(buf);
!               pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
!               pageopaque->hasho_prevblkno = InvalidBlockNumber;
!               pageopaque->hasho_nextblkno = InvalidBlockNumber;
!               pageopaque->hasho_bucket = i;
!               pageopaque->hasho_flag = LH_BUCKET_PAGE;
                pageopaque->hasho_page_id = HASHO_PAGE_ID;
!               _hash_wrtbuf(rel, buf);
!       }
  
        /*
         * Initialize first bitmap page
--- 413,488 ----
         * We initialize the index with two buckets, 0 and 1, occupying physical
         * blocks 1 and 2.      The first freespace bitmap page is in block 3.
         */
! 
!         /* We need this calculation to correctly set the splits
!          * and to set the value of the low/high mask.
!          */
!         lg2buckets = _hash_log2_floor(num_bkt);
! 
!         pwr2 = 1 << lg2buckets;
! 
! 
!         metap->hashm_maxbucket = num_bkt -1;
!         /* We want the highmask to mask the next larger
!          * power of 2 value greated than the number of buckets
!          * we need.  So, if we need 9 buckets, our high mask
!          * value would be 15.  Our low mask value will be 7
!          */
! 
!         metap->hashm_highmask = (pwr2 << 1) -1;
!         metap->hashm_lowmask = pwr2 - 1;
! 
  
        MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
        MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
  
        metap->hashm_spares[1] = 1; /* the first bitmap page is only spare */
! 
! 
!         /*
!          * No overflows will have occurred during this initialization process
!          * so we need to just copy the value '1' into each position in the
!          * hashm_spares array.  This is needed to correctly determine how each
!          * bucket maps to the logical page on disk.
!          *
!          */
! 
!         for (i = 2; i <= _hash_log2(num_bkt); i++)
!                 metap->hashm_spares[i] = 1;
! 
! 
!         /* This value needs to be the ceiling log to properly set the overflow
!          * beyond our max bucket */
!         metap->hashm_ovflpoint = _hash_log2(num_bkt);
! 
        metap->hashm_firstfree = 0;
  
! 
!         /* We need to make sure the file system can handle
!          * this big block of pages we are going to allocate
!          * _hash_alloc_buckets() will do that for us
!          */
! 
!         start_blk = BUCKET_TO_BLKNO(metap, 2);
!         _hash_alloc_buckets(rel, start_blk, (pwr2<<1)-2);
! 
!         /*
!          * Initialize the first 'num_bkt' buckets
!          */
!         for (i = 0; i < num_bkt; i++)
!         {
! 
!                 buf = _hash_getnewbuf(rel, BUCKET_TO_BLKNO(metap, i));
!                 pg = BufferGetPage(buf);
!                 _hash_pageinit(pg, BufferGetPageSize(buf));
!                 pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
!                 pageopaque->hasho_prevblkno = InvalidBlockNumber;
!                 pageopaque->hasho_nextblkno = InvalidBlockNumber;
!                 pageopaque->hasho_bucket = i;
!                 pageopaque->hasho_flag = LH_BUCKET_PAGE;
                pageopaque->hasho_page_id = HASHO_PAGE_ID;
!                 _hash_wrtbuf(rel, buf);
!         }
  
        /*
         * Initialize first bitmap page
*** ../../../src/backend/access/hash/hashutil.c.orig    2007-09-23 
19:47:27.000000000 -0700
--- ../../../src/backend/access/hash/hashutil.c 2007-09-23 19:49:44.000000000 
-0700
***************
*** 120,125 ****
--- 120,144 ----
        return bucket;
  }
  
+ 
+ /* _hash_log2_floor -- returns floor(lg2(num))
+  *
+  */
+ uint32
+ _hash_log2_floor(uint32 num)
+ {
+         uint32          i, limit;
+         limit = 1;
+         for (i=0; limit < num; limit <<= 1, i++)
+                 ;
+         if (limit == num)
+                 return i;
+         else
+                 return i-1;
+ 
+ }
+ 
+ 
  /*
   * _hash_log2 -- returns ceil(lg2(num))
   */
*** ../../../src/backend/access/hash/Makefile.orig      2007-09-23 
20:30:39.000000000 -0700
--- ../../../src/backend/access/hash/Makefile   2007-09-23 20:31:12.000000000 
-0700
***************
*** 13,19 ****
  include $(top_builddir)/src/Makefile.global
  
  OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
!        hashsearch.o hashutil.o
  
  all: SUBSYS.o
  
--- 13,19 ----
  include $(top_builddir)/src/Makefile.global
  
  OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashscan.o \
!        hashsearch.o hashutil.o hashsort.o
  
  all: SUBSYS.o
  
*** ../../../src/backend/access/hash/README.orig        2007-09-24 
00:07:32.000000000 -0700
--- ../../../src/backend/access/hash/README     2007-09-24 19:18:10.312700000 
-0700
***************
*** 105,110 ****
--- 105,135 ----
  number 3, which is the first bitmap page and is allocated during index
  creation.
  
+ Building an index on a full table
+ ----------------------------------
+ 
+ An index can be created on a table already loaded with tuples. Before it
+ is built, an estimate of the number of bucket pages that might be needed to
+ fit all the index tuples, is calculated.This is done by scanning the base
+ relation and counting the number of tuples to be indexed. A Fill factor
+ (either DEFAULT or user-defined as applicable) is then used to get the 
estimate,
+ which is the number of bucket pages initialized. If the estimate falls below 
two,
+ then a minimum of two bucket pages is initialized. However, the number of 
bucket
+ pages allocated is always a power of 2 value greater than the estimate.
+ I.e., if the estimated number of buckets is 3124, then the number of buckets
+ allocated is 4096 (and the number of bucket pages initialized is 3124).
+ If the estimate is 5456, then number allocated is 8192 (and the number 
initialized
+ is 5456) and so on.
+ 
+ A spool holds all the index tuples before they are inserted into the
+ index pages. Content of the spool is sorted on the hash value order
+ so that there will be less backtracking to the bucket pages we have already
+ visited.
+ 
+ The intention of creating as many as the estimated number of pages is to
+ avoid bucket page splits at splitpoint S and hence, redistribution of tuples.
+ Since we create all the bucket pages we need and insert the tuples based on 
the
+ bucket number they belong to, there will not be any need for splitting.
  
  Lock definitions
  ----------------
*** ../../../src/backend/utils/sort/tuplesort.c.orig    2007-09-23 
19:51:30.000000000 -0700
--- ../../../src/backend/utils/sort/tuplesort.c 2007-09-24 00:03:25.000000000 
-0700
***************
*** 341,346 ****
--- 341,347 ----
        Relation        indexRel;
        ScanKey         indexScanKey;
        bool            enforceUnique;  /* complain if we find duplicate tuples 
*/
+       uint32          bkt_mask;       /* Bucket mask for hash sort */
  
        /*
         * These variables are specific to the Datum case; they are set by
***************
*** 439,444 ****
--- 440,448 ----
  static void reversedirection_heap(Tuplesortstate *state);
  static int comparetup_index(const SortTuple *a, const SortTuple *b,
                                 Tuplesortstate *state);
+ static int comparetup_hashindex(const SortTuple *a, const SortTuple *b,
+                                Tuplesortstate *state);
+ 
  static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup);
  static void writetup_index(Tuplesortstate *state, int tapenum,
                           SortTuple *stup);
***************
*** 2621,2626 ****
--- 2625,2673 ----
                                                                &stup->isnull1);
  }
  
+ /*
+  * Set state parameters for sorting hash index tuples
+  */
+ void tuplesort_set_hashindex(Tuplesortstate *state)
+ {
+         state->comparetup = comparetup_hashindex;
+         state->indexScanKey = NULL; /* Scan key is not needed in hash index */
+         state->enforceUnique = false;   /* no reason to enforce uniqueness 
with a hash table */
+ 
+ }
+ 
+ 
+ void tuplesort_set_bktmask(Tuplesortstate *state, uint32 mask)
+ {
+         state->bkt_mask = mask;
+ }
+ 
+ 
+ /* Special compare function called for the hash sort algorithm
+  * We are comparing hash values of the key, which then are masked to
+  * determine the proper hash table bucket.
+  */
+ 
+ 
+ static int comparetup_hashindex(const SortTuple *a, const SortTuple *b, 
Tuplesortstate *state)
+ {
+         uint32  bkta, bktb;
+ 
+         /* Allow interupting long sorts */
+         CHECK_FOR_INTERRUPTS();
+ 
+         bkta = (_hash_datum2hashkey(state->indexRel, a->datum1)) & 
(uint32)state->bkt_mask;
+         bktb = (_hash_datum2hashkey(state->indexRel, b->datum1)) & 
(uint32)state->bkt_mask;
+ 
+         if (bkta > bktb)
+                 return 1;
+         else if (bkta < bktb)
+                 return -1;
+         else
+                 return 0;
+ 
+ }
+ 
  static void
  reversedirection_heap(Tuplesortstate *state)
  {
*** ../../../src/include/access/hash.h.orig     2007-09-23 17:50:40.000000000 
-0700
--- ../../../src/include/access/hash.h  2007-09-23 17:40:21.000000000 -0700
***************
*** 282,287 ****
--- 282,297 ----
                                                                Bucket bucket, 
BlockNumber bucket_blkno,
                                                                
BufferAccessStrategy bstrategy);
  
+ /*hashsort.c*/
+ typedef struct HSpool HSpool;
+ extern HSpool *h_spoolinit(Relation index);
+ extern void h_spool(IndexTuple itup, HSpool *hspool);
+ extern uint32 h_bkt_num(uint32 tuples, Relation rel);
+ extern void h_set_bkt_mask(HSpool *hspool, uint32 bkts);
+ extern void h_do_sort(HSpool *hspool);
+ extern void h_print_spool(HSpool *hspool);
+ extern void h_spooldestroy(HSpool *hspool);
+ 
  /* hashpage.c */
  extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
  extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int 
access);
***************
*** 298,304 ****
  extern void _hash_wrtbuf(Relation rel, Buffer buf);
  extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
                                   int to_access);
! extern void _hash_metapinit(Relation rel);
  extern void _hash_pageinit(Page page, Size size);
  extern void _hash_expandtable(Relation rel, Buffer metabuf);
  
--- 308,314 ----
  extern void _hash_wrtbuf(Relation rel, Buffer buf);
  extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
                                   int to_access);
! extern void _hash_metapinit(Relation rel, uint32 pages);
  extern void _hash_pageinit(Page page, Size size);
  extern void _hash_expandtable(Relation rel, Buffer metabuf);
  
***************
*** 320,325 ****
--- 330,336 ----
  extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
                                         uint32 highmask, uint32 lowmask);
  extern uint32 _hash_log2(uint32 num);
+ extern uint32 _hash_log2_floor(uint32 num);
  extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
  
  /* hash.c */
*** ../../../src/include/utils/tuplesort.h.orig 2007-09-23 18:52:07.000000000 
-0700
--- ../../../src/include/utils/tuplesort.h      2007-09-23 18:59:13.000000000 
-0700
***************
*** 55,60 ****
--- 55,63 ----
                                          Oid sortOperator, bool nullsFirstFlag,
                                          int workMem, bool randomAccess);
  
+ extern void tuplesort_set_hashindex(Tuplesortstate *state);
+ extern void tuplesort_set_bktmask(Tuplesortstate *state, uint32 mask);
+ 
  extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
  
  extern void tuplesort_puttupleslot(Tuplesortstate *state,
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?

               http://archives.postgresql.org

Reply via email to