Hi hackers!

Here is prototype of procrastinating GiST. Or lazy GiST. Or buffered GiST.
Indeed, there is nothing fractal about it.

The concept is leaf tuples stall on internal pages. From time to time they
are pushed down in batches. This code is far from perfect, I've coded this
only for the research purposes.

I have tested code with insertion of random 3D cubes. On 1 million of
insertions classic GiST is 8% faster, on 3M performance is equal, on 30M
lazy GiST if 12% faster.
I've tested it with SSD disk, may be with HDD difference will be more
significant.
Test scenario is here https://github.com/x4m/postgres_g/blob/bufdev/test.sql

In theory, this is cache friendly implementation (but not cache oblivious)
and it must be faster even for small datasets without huge disk work. But
it has there extra cost of coping batch of tuples to palloc`d array,
couln't avoid that.

This is just a proof-of-concept for performance measrures:
1. make check fails for two tests
2. WAL is broken
3. code is a mess in some places

I'm not sure 12% of performance worth it. I'll appreciate any ideas on what
to do next.

Best regards, Andrey Borodin.

2013-02-13 22:54 GMT+05:00 Simon Riggs <si...@2ndquadrant.com>:

> On 13 February 2013 16:48, Heikki Linnakangas <hlinnakan...@vmware.com>
> wrote:
> > On 13.02.2013 18:20, Tom Lane wrote:
> >>
> >> Heikki Linnakangas<hlinnakan...@vmware.com>  writes:
> >>>
> >>> The basic idea of a fractal tree index is to attach a buffer to every
> >>> non-leaf page. On insertion, instead of descending all the way down to
> >>> the correct leaf page, the new tuple is put on the buffer at the root
> >>> page. When that buffer fills up, all the tuples in the buffer are
> >>> cascaded down to the buffers on the next level pages. And recursively,
> >>> whenever a buffer fills up at any level, it's flushed to the next
> level.
> >>
> >>
> >> [ scratches head... ]  What's "fractal" about that?  Or is that just a
> >> content-free marketing name for this technique?
> >
> >
> > I'd call it out as a marketing name. I guess it's fractal in the sense
> that
> > all levels of the tree can hold "leaf tuples" in the buffers; the
> structure
> > looks the same no matter how deep you zoom, like a fractal.. But
> "Buffered"
> > would be more appropriate IMO.
>
> I hope for their sake there is more to it than that. It's hard to see
> how buffering can be patented.
>
> --
>  Simon Riggs                   http://www.2ndQuadrant.com/
>  PostgreSQL Development, 24x7 Support, Training & Services
>
>
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index b8aa9bc..29770d2 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -265,6 +265,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE 
*giststate,
                bool            is_rootsplit;
                int                     npage;
 
+
                is_rootsplit = (blkno == GIST_ROOT_BLKNO);
 
                /*
@@ -524,6 +525,8 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE 
*giststate,
                        gistfillbuffer(page, itup, ntup, InvalidOffsetNumber);
                }
 
+               GistPageGetOpaque(page)->nlazy=1; //DO NOT FORGET: remark pages 
after split
+
                MarkBufferDirty(buffer);
 
                if (BufferIsValid(leftchildbuf))
@@ -589,6 +592,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE 
*giststate,
  * this routine assumes it is invoked in a short-lived memory context,
  * so it does not bother releasing palloc'd allocations.
  */
+
 void
 gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate)
 {
@@ -597,7 +601,6 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, 
GISTSTATE *giststate)
        GISTInsertStack firststack;
        GISTInsertStack *stack;
        GISTInsertState state;
-       bool            xlocked = false;
 
        memset(&state, 0, sizeof(GISTInsertState));
        state.freespace = freespace;
@@ -610,6 +613,21 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, 
GISTSTATE *giststate)
        firststack.downlinkoffnum = InvalidOffsetNumber;
        state.stack = stack = &firststack;
 
+
+       gistdoinsertloop(r,itup,freespace, giststate, stack, state, 0);
+}
+
+void
+gistdoinsertloop(Relation r, IndexTuple itup, Size freespace, GISTSTATE 
*giststate,GISTInsertStack *givenstack, GISTInsertState state, GISTInsertStack 
*nolazy)
+{
+       ItemId          iid;
+       IndexTuple      idxtuple;
+
+       bool            xlocked = false;
+       GISTInsertStack *stack = givenstack;
+
+
+
        /*
         * Walk down along the path of smallest penalty, updating the parent
         * pointers with the key we're inserting as we go. If we crash in the
@@ -685,9 +703,86 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, 
GISTSTATE *giststate)
                        GISTInsertStack *item;
                        OffsetNumber downlinkoffnum;
 
+
+
+                       if(stack!=nolazy)
+                       {
+                               if(GistPageCanStoreLazy(stack->page, itup))
+                               {
+                                       if (!xlocked)
+                                                                       {
+                                                                               
LockBuffer(stack->buffer, GIST_UNLOCK);
+                                                                               
LockBuffer(stack->buffer, GIST_EXCLUSIVE);
+                                                                               
xlocked = true;
+                                                                               
stack->page = (Page) BufferGetPage(stack->buffer);
+
+                                                                               
if (PageGetLSN(stack->page) != stack->lsn)
+                                                                               
{
+                                                                               
        /* the page was changed while we unlocked it, retry */
+                                                                               
        continue;
+                                                                               
}
+                                                                       }
+
+                                       //elog(WARNING,"writing lazy tuple");
+
+                                       GistTupleSetLazy(itup);
+                                       gistinserttuple(&state, stack, 
giststate, itup,
+                                                                               
                InvalidOffsetNumber);
+                                       LockBuffer(stack->buffer, GIST_UNLOCK);
+                                       xlocked = false;
+
+                                       break;
+                               }
+                               else if (GistPageGetOpaque(stack->page)->nlazy)
+                               {
+                                       OffsetNumber i;
+                                       OffsetNumber maxoff = 
PageGetMaxOffsetNumber(stack->page);
+
+                                       //elog(WARNING,"starting push");
+                                       if (!xlocked)
+                                                                       {
+                                                                               
//elog(WARNING,"xloc for push");
+                                                                               
LockBuffer(stack->buffer, GIST_UNLOCK);
+                                                                               
LockBuffer(stack->buffer, GIST_EXCLUSIVE);
+                                                                               
xlocked = true;
+                                                                               
stack->page = (Page) BufferGetPage(stack->buffer);
+
+                                                                               
if (PageGetLSN(stack->page) != stack->lsn)
+                                                                               
{
+                                                                               
        /* the page was changed while we unlocked it, retry */
+                                                                               
        continue;
+                                                                               
}
+                                                                       }
+
+                                       int nlen;
+                                       IndexTuple* lazys = 
gistextractlazy(stack->page,&nlen);
+
+                                       LockBuffer(stack->buffer, GIST_UNLOCK);
+
+                                       xlocked = false;
+
+                                       for (i = 0; i < nlen; i++)
+                                               {
+                                                       IndexTuple ltup = 
lazys[i];
+                                                       //elog(WARNING,"push 
%d",i);
+                                                       
GistTupleUnsetLazy(ltup);
+                                                       
gistdoinsertloop(r,ltup,freespace,giststate,stack,state,stack);
+                                                       //elog(WARNING,"push 
out, on page %d tuples", PageGetMaxOffsetNumber(stack->page));
+                                               }
+                                       LockBuffer(stack->buffer, GIST_SHARE);
+                                       GistPageGetOpaque(stack->page)->nlazy = 
0;
+                               }
+
+                       }
+                       else
+                       {
+                               //elog(WARNING,"skipped laziness");
+                       }
+
                        downlinkoffnum = gistchoose(state.r, stack->page, itup, 
giststate);
                        iid = PageGetItemId(stack->page, downlinkoffnum);
                        idxtuple = (IndexTuple) PageGetItem(stack->page, iid);
+                       //elog(WARNING,"picked %d",downlinkoffnum);
                        childblkno = 
ItemPointerGetBlockNumber(&(idxtuple->t_tid));
 
                        /*
@@ -753,6 +848,7 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, 
GISTSTATE *giststate)
                                        continue;
                                }
                        }
+
                        LockBuffer(stack->buffer, GIST_UNLOCK);
                        xlocked = false;
 
@@ -825,12 +921,20 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, 
GISTSTATE *giststate)
                                                        InvalidOffsetNumber);
                        LockBuffer(stack->buffer, GIST_UNLOCK);
 
-                       /* Release any pins we might still hold before exiting 
*/
-                       for (; stack; stack = stack->parent)
-                               ReleaseBuffer(stack->buffer);
+
                        break;
                }
        }
+
+
+       /* Release any pins we might still hold before exiting */
+                               for (; stack; stack = stack->parent)
+                                       {
+                                               if(nolazy != stack)
+                                                       
ReleaseBuffer(stack->buffer);
+                                               if(stack == givenstack)
+                                                       break;
+                                       }
 }
 
 /*
@@ -1256,6 +1360,7 @@ gistfinishsplit(GISTInsertState *state, GISTInsertStack 
*stack,
        GISTPageSplitInfo *left;
        IndexTuple      tuples[2];
 
+
        /* A split always contains at least two halves */
        Assert(list_length(splitinfo) >= 2);
 
diff --git a/src/backend/access/gist/gistget.c 
b/src/backend/access/gist/gistget.c
index 5ba7d0a..c51c842 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -197,7 +197,7 @@ gistindex_keytest(IndexScanDesc scan,
 
                        gistdentryinit(giststate, key->sk_attno - 1, &de,
                                                   datum, r, page, offset,
-                                                  FALSE, isNull);
+                                                  FALSE, isNull, 
GistTupleIsLazy(tuple));
 
                        /*
                         * Call the Consistent function to evaluate the test.  
The
@@ -258,7 +258,7 @@ gistindex_keytest(IndexScanDesc scan,
 
                        gistdentryinit(giststate, key->sk_attno - 1, &de,
                                                   datum, r, page, offset,
-                                                  FALSE, isNull);
+                                                  FALSE, isNull, 
GistTupleIsLazy(tuple));
 
                        /*
                         * Call the Distance function to evaluate the distance. 
 The
@@ -422,7 +422,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, 
double *myDistances,
                if (!match)
                        continue;
 
-               if (tbm && GistPageIsLeaf(page))
+               if (tbm && (GistPageIsLeaf(page) || GistTupleIsLazy(it)))
                {
                        /*
                         * getbitmap scan, so just push heap tuple TIDs into 
the bitmap
@@ -431,7 +431,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, 
double *myDistances,
                        tbm_add_tuples(tbm, &it->t_tid, 1, recheck);
                        (*ntids)++;
                }
-               else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page))
+               else if (scan->numberOfOrderBys == 0 && (GistPageIsLeaf(page) 
|| GistTupleIsLazy(it)))
                {
                        /*
                         * Non-ordered scan, so report tuples in so->pageData[]
@@ -466,7 +466,7 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, 
double *myDistances,
                        /* Create new GISTSearchItem for this item */
                        item = 
palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys));
 
-                       if (GistPageIsLeaf(page))
+                       if ((GistPageIsLeaf(page) || GistTupleIsLazy(it)))
                        {
                                /* Creating heap-tuple GISTSearchItem */
                                item->blkno = InvalidBlockNumber;
diff --git a/src/backend/access/gist/gistsplit.c 
b/src/backend/access/gist/gistsplit.c
index d394969..c9680db 100644
--- a/src/backend/access/gist/gistsplit.c
+++ b/src/backend/access/gist/gistsplit.c
@@ -643,7 +643,7 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int 
len,
                                                          &IsNull);
                gistdentryinit(giststate, attno, &(entryvec->vector[i]),
                                           datum, r, page, i,
-                                          FALSE, IsNull);
+                                          FALSE, IsNull, 
GistTupleIsLazy(itup[i-1]));
                if (IsNull)
                        offNullTuples[nOffNullTuples++] = i;
        }
diff --git a/src/backend/access/gist/gistutil.c 
b/src/backend/access/gist/gistutil.c
index 887c58b..897e7f0 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -105,6 +105,45 @@ gistextractpage(Page page, int *len /* out */ )
        return itvec;
 }
 
+
+IndexTuple *
+gistextractlazy(Page page, int *len /* out */ )
+{
+       OffsetNumber i,
+                               maxoff;
+       IndexTuple *itvec;
+       OffsetNumber* itemNos;
+       //elog(WARNING,"extracting for push");
+
+       maxoff = PageGetMaxOffsetNumber(page);
+       *len = 0;
+       itvec = palloc(sizeof(IndexTuple) * maxoff);
+       itemNos = palloc(sizeof(OffsetNumber) * maxoff);
+       for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+       {
+               ItemId tid = PageGetItemId(page, i);
+               IndexTuple it = (IndexTuple) PageGetItem(page, tid);
+               if(GistTupleIsLazy(it))
+               {
+                       //elog(WARNING,"extract item %d",i);
+                       IndexTuple copy = palloc(tid->lp_len);
+                       //elog(WARNING,"memmove",i);
+                       memmove(copy,it,tid->lp_len);
+                       //elog(WARNING,"offset %d to index %d",i,*len);
+                       itemNos[*len] = i;
+                       //elog(WARNING,"ptr",i);
+                       itvec[*len] = copy;
+                       *len = *len+1;
+                       //elog(WARNING,"done",i);
+               }
+       }
+
+       //elog(WARNING,"deleting %d",*len);
+       PageIndexMultiDelete(page,itemNos,*len);
+
+       return itvec;
+}
+
 /*
  * join two vectors into one
  */
@@ -178,7 +217,7 @@ gistMakeUnionItVec(GISTSTATE *giststate, IndexTuple *itvec, 
int len,
                                                   evec->vector + evec->n,
                                                   datum,
                                                   NULL, NULL, (OffsetNumber) 0,
-                                                  FALSE, IsNull);
+                                                  FALSE, IsNull, 
GistTupleIsLazy(itvec[j]));
                        evec->n++;
                }
 
@@ -302,7 +341,7 @@ gistDeCompressAtt(GISTSTATE *giststate, Relation r, 
IndexTuple tuple, Page p,
                datum = index_getattr(tuple, i + 1, giststate->tupdesc, 
&isnull[i]);
                gistdentryinit(giststate, i, &attdata[i],
                                           datum, r, p, o,
-                                          FALSE, isnull[i]);
+                                          FALSE, isnull[i], 
GistTupleIsLazy(tuple));
        }
 }
 
@@ -438,6 +477,9 @@ gistchoose(Relation r, Page p, IndexTuple it,       /* it 
has compressed entry */
                bool            zero_penalty;
                int                     j;
 
+               if(GistTupleIsLazy(itup))
+                       continue;
+
                zero_penalty = true;
 
                /* Loop over index attributes. */
@@ -450,7 +492,7 @@ gistchoose(Relation r, Page p, IndexTuple it,       /* it 
has compressed entry */
                        /* Compute penalty for this column. */
                        datum = index_getattr(itup, j + 1, giststate->tupdesc, 
&IsNull);
                        gistdentryinit(giststate, j, &entry, datum, r, p, i,
-                                                  FALSE, IsNull);
+                                                  FALSE, IsNull, 
GistTupleIsLazy(itup));
                        usize = gistpenalty(giststate, j, &entry, IsNull,
                                                                &identry[j], 
isnull[j]);
                        if (usize > 0)
@@ -542,7 +584,7 @@ gistchoose(Relation r, Page p, IndexTuple it,       /* it 
has compressed entry */
 void
 gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
                           Datum k, Relation r, Page pg, OffsetNumber o,
-                          bool l, bool isNull)
+                          bool l, bool isNull, bool lazy)
 {
        if (!isNull)
        {
@@ -557,9 +599,20 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY 
*e,
                if (dep != e)
                        gistentryinit(*e, dep->key, dep->rel, dep->page, 
dep->offset,
                                                  dep->leafkey);
+               if(lazy)
+               {
+                       dep->leafpage = false;
+               }
        }
        else
+       {
                gistentryinit(*e, (Datum) 0, r, pg, o, l);
+
+               if(lazy)
+               {
+                       e->leafpage = false;
+               }
+       }
 }
 
 IndexTuple
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 4343d6f..ffa9048 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -59,6 +59,7 @@ typedef struct GISTPageOpaqueData
 {
        PageGistNSN nsn;                        /* this value must change on 
page split */
        BlockNumber rightlink;          /* next page if any */
+       uint16          nlazy;
        uint16          flags;                  /* see bit definitions above */
        uint16          gist_page_id;   /* for identification of GiST indexes */
 } GISTPageOpaqueData;
@@ -125,12 +126,13 @@ typedef struct GISTENTRY
        Page            page;
        OffsetNumber offset;
        bool            leafkey;
+       bool            leafpage;
 } GISTENTRY;
 
 #define GistPageGetOpaque(page) ( (GISTPageOpaque) PageGetSpecialPointer(page) 
)
 
 #define GistPageIsLeaf(page)   ( GistPageGetOpaque(page)->flags & F_LEAF)
-#define GIST_LEAF(entry) (GistPageIsLeaf((entry)->page))
+#define GIST_LEAF(entry) (((entry)->leafpage))
 
 #define GistPageIsDeleted(page) ( GistPageGetOpaque(page)->flags & F_DELETED)
 #define GistPageSetDeleted(page)       ( GistPageGetOpaque(page)->flags |= 
F_DELETED)
@@ -168,6 +170,6 @@ typedef struct
  */
 #define gistentryinit(e, k, r, pg, o, l) \
        do { (e).key = (k); (e).rel = (r); (e).page = (pg); \
-                (e).offset = (o); (e).leafkey = (l); } while (0)
+                (e).offset = (o); (e).leafkey = (l); (e).leafpage = (pg) && 
GistPageIsLeaf(pg);} while (0)
 
 #endif   /* GIST_H */
diff --git a/src/include/access/gist_private.h 
b/src/include/access/gist_private.h
index 78e87a6..669d697 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -320,6 +320,7 @@ typedef struct
 #define  GistTupleIsInvalid(itup)      ( ItemPointerGetOffsetNumber( 
&((itup)->t_tid) ) == TUPLE_IS_INVALID )
 #define  GistTupleSetValid(itup)       ItemPointerSetOffsetNumber( 
&((itup)->t_tid), TUPLE_IS_VALID )
 
+#define GistPageCanStoreLazy(page,itup) ((int) ((PageHeader) page)->pd_upper - 
(int) ((PageHeader) page)->pd_lower - IndexTupleSize(itup) > 1024)
 
 
 
@@ -435,6 +436,13 @@ extern void gistdoinsert(Relation r,
                         IndexTuple itup,
                         Size freespace,
                         GISTSTATE *GISTstate);
+extern void gistdoinsertloop(Relation r,
+                        IndexTuple itup,
+                        Size freespace,
+                        GISTSTATE *GISTstate,
+                        GISTInsertStack *stack,
+                        GISTInsertState state,
+                        GISTInsertStack *nolazy);
 
 /* A List of these is returned from gistplacetopage() in *splitinfo */
 typedef struct
@@ -498,6 +506,7 @@ extern Buffer gistNewBuffer(Relation r);
 extern void gistfillbuffer(Page page, IndexTuple *itup, int len,
                           OffsetNumber off);
 extern IndexTuple *gistextractpage(Page page, int *len /* out */ );
+extern IndexTuple *gistextractlazy(Page page, int *len /* out */ );
 extern IndexTuple *gistjoinvector(
                           IndexTuple *itvec, int *len,
                           IndexTuple *additvec, int addlen);
@@ -519,7 +528,7 @@ extern OffsetNumber gistchoose(Relation r, Page p,
 extern void GISTInitBuffer(Buffer b, uint32 f);
 extern void gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
                           Datum k, Relation r, Page pg, OffsetNumber o,
-                          bool l, bool isNull);
+                          bool l, bool isNull, bool lazy);
 
 extern float gistpenalty(GISTSTATE *giststate, int attno,
                        GISTENTRY *key1, bool isNull1,
diff --git a/src/include/access/itup.h b/src/include/access/itup.h
index 8350fa0..d96dd94 100644
--- a/src/include/access/itup.h
+++ b/src/include/access/itup.h
@@ -72,6 +72,11 @@ typedef IndexAttributeBitMapData *IndexAttributeBitMap;
 #define IndexTupleHasNulls(itup)       ((((IndexTuple) (itup))->t_info & 
INDEX_NULL_MASK))
 #define IndexTupleHasVarwidths(itup) ((((IndexTuple) (itup))->t_info & 
INDEX_VAR_MASK))
 
+#define TUPLE_IS_LAZY          0x2000
+#define  GistTupleIsLazy(itup)         (((IndexTuple)itup)->t_info & 
TUPLE_IS_LAZY)
+#define  GistTupleSetLazy(itup)                do { ((IndexTuple)itup)->t_info 
= ((IndexTuple)itup)->t_info | TUPLE_IS_LAZY; } while(0)
+#define  GistTupleUnsetLazy(itup)      do { ((IndexTuple)itup)->t_info = 
((IndexTuple)itup)->t_info & ~TUPLE_IS_LAZY; } while(0)
+
 
 /*
  * Takes an infomask as argument (primarily because this needs to be usable
diff --git a/test.sh b/test.sh
new file mode 100755
index 0000000..3767958
--- /dev/null
+++ b/test.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+/home/x4m/project/bin/psql postgres</home/x4m/pgsql/test.sql
diff --git a/test.sql b/test.sql
new file mode 100644
index 0000000..34a89d9
--- /dev/null
+++ b/test.sql
@@ -0,0 +1,27 @@
+\timing
+SET client_min_messages = 'DEBUG5';
+SET log_min_messages = 'DEBUG5';
+SET wal_level = 'minimal';
+
+create extension if not exists cube;
+
+begin transaction;
+SELECT setseed(.43);
+
+
+create table dataTable(c cube);
+create index idx on dataTable using gist(c);
+
+insert into dataTable(c) select cube(array[random(),random(),random()]) from 
generate_series(1,1e2,1) x,generate_series(1,1e2,1) y,generate_series(1,3e3,1) 
z;
+
+
+/*create table queries(id int,l1 float,l2 float,l3 float, u1 float,u2 float, 
u3 float, q cube);
+insert into queries(id,l1,l2,l3) select 
s,random()/1.3,random()/1.3,random()/1.3 from generate_series(1,1e4,1) s;
+update queries set q = 
cube(array[l1,l2,l3],array[l1+0.01001,l2+0.10001,l3+0.10001]);
+
+select id,(select count(*) from dataTable dt where dt.c<@q) from queries order 
by id;*/
+
+rollback;
+--drop index idx;
+--drop table dataTable;
+--drop table queries;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to