Ok, I tried two optimisations:

1. By creating a special version of comparetup_index for single key
integer indexes. Create an index_get_attr with byval and len args. By
using fetch_att and specifying the values at compile time, gcc
optimises the whole call to about 12 instructions of assembly rather
than the usual mess.

2. By specifying: -Winline -finline-limit-1500 (only on tuplesort.c).
This causes inlineApplySortFunction() to be inlined, like the code
obviously expects it to be.

default build (baseline)                    235 seconds
-finline only                               217 seconds (7% better)
comparetup_index_fastbyval4 only            221 seconds (6% better)
comparetup_index_fastbyval4 and -finline    203 seconds (13.5% better)

This is indexing the integer sequence column on a 2.7 million row
table. The times are as given by gprof and so exclude system call time.

Basically, I recommend adding "-Winline -finline-limit-1500" to the
default build while we discuss other options.

comparetup_index_fastbyval4 patch attached per example.
-- 
Martijn van Oosterhout   <kleptog@svana.org>   http://svana.org/kleptog/
> Patent. n. Genius is 5% inspiration and 95% perspiration. A patent is a
> tool for doing 5% of the work and then sitting around waiting for someone
> else to do the other 95% so you can sue them.
--- pgsql-clean/src/include/access/itup.h       2005-10-02 21:30:20.327464320 
+0200
+++ pgsql-sort/src/include/access/itup.h        2005-10-02 16:04:00.000000000 
+0200
@@ -126,6 +126,34 @@
        ) \
 )
 
+#define index_get_attr(tup, attnum, tupleDesc, attbyval, attlen, isnull) \
+( \
+       AssertMacro(PointerIsValid(isnull) && (attnum) > 0), \
+       *(isnull) = false, \
+       !IndexTupleHasNulls(tup) ? \
+       ( \
+               (tupleDesc)->attrs[(attnum)-1]->attcacheoff >= 0 ? \
+               ( \
+                       fetch_att((char *) (tup) + 
IndexInfoFindDataOffset((tup)->t_info) \
+                               + (tupleDesc)->attrs[(attnum)-1]->attcacheoff, 
attbyval, attlen) \
+               ) \
+               : \
+                       nocache_index_getattr((tup), (attnum), (tupleDesc), 
(isnull)) \
+       ) \
+       : \
+       ( \
+               (att_isnull((attnum)-1, (char *)(tup) + 
sizeof(IndexTupleData))) ? \
+               ( \
+                       *(isnull) = true, \
+                       (Datum)NULL \
+               ) \
+               : \
+               ( \
+                       nocache_index_getattr((tup), (attnum), (tupleDesc), 
(isnull)) \
+               ) \
+       ) \
+)
+
 
 /* routines in indextuple.c */
 extern IndexTuple index_form_tuple(TupleDesc tupleDescriptor,
--- pgsql-clean/src/backend/utils/sort/tuplesort.c      2005-09-24 
23:23:39.000000000 +0200
+++ pgsql-sort/src/backend/utils/sort/tuplesort.c       2005-10-02 
21:29:39.349086302 +0200
@@ -375,6 +375,8 @@
                         unsigned int len);
 static int comparetup_index(Tuplesortstate *state,
                                 const void *a, const void *b);
+static int comparetup_index_fastbyval4(Tuplesortstate *state,
+                                const void *a, const void *b);
 static void *copytup_index(Tuplesortstate *state, void *tup);
 static void writetup_index(Tuplesortstate *state, int tapenum, void *tup);
 static void *readtup_index(Tuplesortstate *state, int tapenum,
@@ -498,8 +500,12 @@
                                          int workMem, bool randomAccess)
 {
        Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
+       TupleDesc       tupDes = RelationGetDescr(indexRel);
 
-       state->comparetup = comparetup_index;
+       if( tupDes->natts == 1 && tupDes->attrs[0]->attbyval == 1 && 
tupDes->attrs[0]->attlen == 4 )
+               state->comparetup = comparetup_index_fastbyval4;
+       else
+               state->comparetup = comparetup_index;
        state->copytup = copytup_index;
        state->writetup = writetup_index;
        state->readtup = readtup_index;
@@ -2102,6 +2108,92 @@
        return 0;
 }
 
+static int
+comparetup_index_fastbyval4(Tuplesortstate *state, const void *a, const void 
*b)
+{
+       /*
+        * This is almost the same as _bt_tuplecompare(), but we need to keep
+        * track of whether any null fields are present.  Also see the special
+        * treatment for equal keys at the end.
+        */
+       IndexTuple      tuple1 = (IndexTuple) a;
+       IndexTuple      tuple2 = (IndexTuple) b;
+       Relation        rel = state->indexRel;
+       ScanKey         scankey = state->indexScanKey;
+       TupleDesc       tupDes;
+       bool            equal_hasnull = false;
+
+       tupDes = RelationGetDescr(rel);
+
+       ScanKey         entry = &scankey[0];
+       Datum           datum1,
+                                       datum2;
+       bool            isnull1,
+                                       isnull2;
+       int32           compare;
+
+       datum1 = index_get_attr(tuple1, 1, tupDes, 1, 4, &isnull1);
+       datum2 = index_get_attr(tuple2, 1, tupDes, 1, 4, &isnull2);
+
+       /* see comments about NULLs handling in btbuild */
+
+       /* the comparison function is always of CMP type */
+       compare = inlineApplySortFunction(&entry->sk_func, SORTFUNC_CMP,
+                                                                         
datum1, isnull1,
+                                                                         
datum2, isnull2);
+
+       if (compare != 0)
+               return (int) compare;           /* done when we find unequal
+                                                                               
 * attributes */
+
+       /* they are equal, so we only need to examine one null flag */
+       if (isnull1)
+               equal_hasnull = true;
+
+       /*
+        * If btree has asked us to enforce uniqueness, complain if two equal
+        * tuples are detected (unless there was at least one NULL field).
+        *
+        * It is sufficient to make the test here, because if two tuples are
+        * equal they *must* get compared at some stage of the sort ---
+        * otherwise the sort algorithm wouldn't have checked whether one must
+        * appear before the other.
+        *
+        * Some rather brain-dead implementations of qsort will sometimes call
+        * the comparison routine to compare a value to itself.  (At this
+        * writing only QNX 4 is known to do such silly things.)  Don't raise
+        * a bogus error in that case.
+        */
+       if (state->enforceUnique && !equal_hasnull && tuple1 != tuple2)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNIQUE_VIOLATION),
+                                errmsg("could not create unique index"),
+                                errdetail("Table contains duplicated 
values.")));
+
+       /*
+        * If key values are equal, we sort on ItemPointer.  This does not
+        * affect validity of the finished index, but it offers cheap
+        * insurance against performance problems with bad qsort
+        * implementations that have trouble with large numbers of equal keys.
+        */
+       {
+               BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
+               BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
+
+               if (blk1 != blk2)
+                       return (blk1 < blk2) ? -1 : 1;
+       }
+       {
+               OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
+               OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
+
+               if (pos1 != pos2)
+                       return (pos1 < pos2) ? -1 : 1;
+       }
+
+       return 0;
+}
+
 static void *
 copytup_index(Tuplesortstate *state, void *tup)
 {

Attachment: pgprr06QKRxCw.pgp
Description: PGP signature

Reply via email to