30.07.2015 16:33, Alexander Korotkov пишет:
Hi!

On Thu, Jul 30, 2015 at 2:51 PM, Anastasia Lubennikova <lubennikov...@gmail.com <mailto:lubennikov...@gmail.com>> wrote:

    I have written microvacuum support for gist access method.
    Briefly microvacuum includes two steps:
    1. When search tells us that the tuple is invisible to all
    transactions it is marked LP_DEAD and page is marked as "has dead
    tuples",
    2. Then, when insert touches full page which has dead tuples it
    calls microvacuum instead of splitting page.
    You can find a kind of review here [1].

    [1]
    
http://www.google-melange.com/gsoc/proposal/public/google/gsoc2015/ivanitskiy_ilya/5629499534213120

    Patch is in attachements. Please review it.


Nice!

Some notes about this patch.

1) Could you give same test case demonstrating that microvacuum really work with patch? Finally, we should get index less growing with microvacuum.

2) Generating notices for every dead tuple would be too noisy. I suggest to replace notice with one of debug levels.

+ elog(NOTICE, "gistkillitems. Mark Item Dead offnum %hd, blkno %d", offnum, BufferGetBlockNumber(buffer));


3) Please, recheck coding style. For instance, this line needs more spaces and open brace should be on the next line.

+ if ((scan->kill_prior_tuple)&&(so->curPageData > 0)&&(so->curPageData == so->nPageData)) {

------
Alexander Korotkov
Postgres Professional: http://www.postgrespro.com <http://www.postgrespro.com/>
The Russian Postgres Company
1) Test and results are in attachments. Everything seems to work as expected. 2) I dropped these notices. It was done only for debug purposes. Updated patch is attached.
3) fixed
//

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

*** a/src/backend/access/gist/gist.c
--- b/src/backend/access/gist/gist.c
***************
*** 36,42 **** static bool gistinserttuples(GISTInsertState *state, 
GISTInsertStack *stack,
                                 bool unlockbuf, bool unlockleftchild);
  static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
                                GISTSTATE *giststate, List *splitinfo, bool 
releasebuf);
! 
  
  #define ROTATEDIST(d) do { \
        SplitedPageLayout 
*tmp=(SplitedPageLayout*)palloc(sizeof(SplitedPageLayout)); \
--- 36,42 ----
                                 bool unlockbuf, bool unlockleftchild);
  static void gistfinishsplit(GISTInsertState *state, GISTInsertStack *stack,
                                GISTSTATE *giststate, List *splitinfo, bool 
releasebuf);
! static void gistvacuumpage(Relation rel, Page page, Buffer buffer);
  
  #define ROTATEDIST(d) do { \
        SplitedPageLayout 
*tmp=(SplitedPageLayout*)palloc(sizeof(SplitedPageLayout)); \
***************
*** 209,214 **** gistplacetopage(Relation rel, Size freespace, GISTSTATE 
*giststate,
--- 209,225 ----
         * because the tuple vector passed to gistSplit won't include this 
tuple.
         */
        is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+ 
+       /*
+        * If leaf page is full, try at first to delete dead tuples. And then
+        * check again.
+        */
+       if ((is_split) && GistPageIsLeaf(page) && GistPageHasGarbage(page))
+       {
+               gistvacuumpage(rel, page, buffer);
+               is_split = gistnospace(page, itup, ntup, oldoffnum, freespace);
+       }
+ 
        if (is_split)
        {
                /* no space for insertion */
***************
*** 1440,1442 **** freeGISTstate(GISTSTATE *giststate)
--- 1451,1519 ----
        /* It's sufficient to delete the scanCxt */
        MemoryContextDelete(giststate->scanCxt);
  }
+ 
+ static void
+ gistvacuumpage(Relation rel, Page page, Buffer buffer)
+ {
+       OffsetNumber deletable[MaxOffsetNumber];
+       int                     ndeletable = 0;
+       OffsetNumber offnum,
+                               minoff,
+                               maxoff;
+ 
+       /*
+        * Scan over all items to see which ones need to be deleted according to
+        * LP_DEAD flags.
+        */
+       maxoff = PageGetMaxOffsetNumber(page);
+       for (offnum = FirstOffsetNumber;
+                offnum <= maxoff;
+                offnum = OffsetNumberNext(offnum))
+       {
+               ItemId          itemId = PageGetItemId(page, offnum);
+ 
+               if (ItemIdIsDead(itemId))
+                       deletable[ndeletable++] = offnum;
+       }
+ 
+       if (ndeletable > 0)
+       {
+               START_CRIT_SECTION();
+ 
+               PageIndexMultiDelete(page, deletable, ndeletable);
+ 
+               /*
+                * Mark the page as not containing any LP_DEAD items.  This is 
not
+                * certainly true (there might be some that have recently been 
marked,
+                * but weren't included in our target-item list), but it will 
almost
+                * always be true and it doesn't seem worth an additional page 
scan to
+                * check it. Remember that F_HAS_GARBAGE is only a hint anyway.
+                */
+               GistClearPageHasGarbage(page);
+ 
+               MarkBufferDirty(buffer);
+ 
+               /* XLOG stuff */
+               if (RelationNeedsWAL(rel))
+               {
+                       XLogRecPtr      recptr;
+ 
+                       recptr = gistXLogUpdate(rel->rd_node, buffer,
+                                                                       
deletable, ndeletable,
+                                                                       NULL, 
0, InvalidBuffer);
+ 
+                       PageSetLSN(page, recptr);
+               }
+               else
+                       PageSetLSN(page, gistGetFakeLSN(rel));
+ 
+               END_CRIT_SECTION();
+       }
+ 
+       /*
+        * Note: if we didn't find any LP_DEAD items, then the page's
+        * F_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a
+        * separate write to clear it, however.  We will clear it when we split
+        * the page.
+        */
+ }
*** a/src/backend/access/gist/gistget.c
--- b/src/backend/access/gist/gistget.c
***************
*** 25,30 ****
--- 25,93 ----
  #include "utils/rel.h"
  
  
+ static void
+ gistkillitems(IndexScanDesc scan)
+ {
+       GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
+       Buffer          buffer;
+       Page            page;
+       OffsetNumber minoff;
+       OffsetNumber maxoff;
+       int                     i;
+       bool            killedsomething = false;
+ 
+       Assert(so->curBlkno != InvalidBlockNumber);
+ 
+       buffer = ReadBuffer(scan->indexRelation, so->curBlkno);
+       if (!BufferIsValid(buffer))
+               return;
+ 
+       LockBuffer(buffer, GIST_SHARE);
+       gistcheckpage(scan->indexRelation, buffer);
+       page = BufferGetPage(buffer);
+ 
+       minoff = FirstOffsetNumber;
+       maxoff = PageGetMaxOffsetNumber(page);
+ 
+       maxoff = PageGetMaxOffsetNumber(page);
+       for (i = 0; i < so->numKilled; i++)
+       {
+               if (so->killedItems != NULL)
+               {
+                       OffsetNumber offnum = FirstOffsetNumber;
+ 
+                       while (offnum <= maxoff)
+                       {
+                               ItemId          iid = PageGetItemId(page, 
offnum);
+                               IndexTuple      ituple = (IndexTuple) 
PageGetItem(page, iid);
+ 
+                               if (ItemPointerEquals(&ituple->t_tid, 
&(so->killedItems[i])))
+                               {
+                                       /* found the item */
+                                       ItemIdMarkDead(iid);
+                                       killedsomething = true;
+                                       break;          /* out of inner search 
loop */
+                               }
+                               offnum = OffsetNumberNext(offnum);
+                       }
+               }
+       }
+ 
+       if (killedsomething)
+       {
+               GistMarkPageHasGarbage(page);
+               MarkBufferDirtyHint(buffer, true);
+       }
+ 
+       UnlockReleaseBuffer(buffer);
+ 
+       /*
+        * Always reset the scan state, so we don't look for same items on other
+        * pages.
+        */
+       so->numKilled = 0;
+ }
+ 
  /*
   * gistindex_keytest() -- does this index tuple satisfy the scan key(s)?
   *
***************
*** 331,337 **** gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, 
double *myDistances,
                /* Ignore tuple if it doesn't match */
                if (!match)
                        continue;
- 
                if (tbm && GistPageIsLeaf(page))
                {
                        /*
--- 394,399 ----
***************
*** 451,456 **** getNextNearest(IndexScanDesc scan)
--- 513,535 ----
  
        if (scan->xs_itup)
        {
+               /*
+                * If previously returned index tuple is not visible save it 
into
+                * so->killedItems. And at the end of the page scan mark all 
saved
+                * tuples as dead.
+                */
+               if (scan->kill_prior_tuple)
+               {
+                       if (so->killedItems == NULL)
+                       {
+                               MemoryContext oldCxt2 = 
MemoryContextSwitchTo(so->giststate->scanCxt);
+ 
+                               so->killedItems = (ItemPointerData *) 
palloc(MaxIndexTuplesPerPage * sizeof(ItemPointerData));
+                               MemoryContextSwitchTo(oldCxt2);
+                       }
+                       if ((so->numKilled < MaxIndexTuplesPerPage))
+                               so->killedItems[so->numKilled++] = 
scan->xs_ctup.t_self;
+               }
                /* free previously returned tuple */
                pfree(scan->xs_itup);
                scan->xs_itup = NULL;
***************
*** 515,520 **** getNextNearest(IndexScanDesc scan)
--- 594,605 ----
                }
                else
                {
+                       if ((so->curBlkno != InvalidBlockNumber) && 
(so->numKilled > 0))
+                               gistkillitems(scan);
+ 
+                       /* save current item BlockNumber for next 
gistkillitems() call */
+                       so->curBlkno = item->blkno;
+ 
                        /* visit an index page, extract its items into queue */
                        CHECK_FOR_INTERRUPTS();
  
***************
*** 572,578 **** gistgettuple(PG_FUNCTION_ARGS)
--- 657,675 ----
                {
                        if (so->curPageData < so->nPageData)
                        {
+                               if ((scan->kill_prior_tuple) && 
(so->curPageData > 0))
+                               {
  
+                                       if (so->killedItems == NULL)
+                                       {
+                                               MemoryContext oldCxt = 
MemoryContextSwitchTo(so->giststate->scanCxt);
+ 
+                                               so->killedItems = 
(ItemPointerData *) palloc(MaxIndexTuplesPerPage * sizeof(ItemPointerData));
+                                               MemoryContextSwitchTo(oldCxt);
+                                       }
+                                       if (so->numKilled < 
MaxIndexTuplesPerPage)
+                                               
so->killedItems[so->numKilled++] = so->pageData[so->curPageData - 1].heapPtr;
+                               }
                                /* continuing to return tuples from a leaf page 
*/
                                scan->xs_ctup.t_self = 
so->pageData[so->curPageData].heapPtr;
                                scan->xs_recheck = 
so->pageData[so->curPageData].recheck;
***************
*** 586,594 **** gistgettuple(PG_FUNCTION_ARGS)
--- 683,711 ----
                                PG_RETURN_BOOL(true);
                        }
  
+                       /*
+                        * Check the last returned tuple and add it to 
killitems if
+                        * necessary
+                        */
+                       if ((scan->kill_prior_tuple) && (so->curPageData > 0) 
&& (so->curPageData == so->nPageData))
+                       {
+ 
+                               if (so->killedItems == NULL)
+                               {
+                                       MemoryContext oldCxt = 
MemoryContextSwitchTo(so->giststate->scanCxt);
+ 
+                                       so->killedItems = (ItemPointerData *) 
palloc(MaxIndexTuplesPerPage * sizeof(ItemPointerData));
+                                       MemoryContextSwitchTo(oldCxt);
+                               }
+                               if ((so->numKilled < MaxIndexTuplesPerPage))
+                                       so->killedItems[so->numKilled++] = 
so->pageData[so->curPageData - 1].heapPtr;
+                       }
                        /* find and process the next index page */
                        do
                        {
+                               if ((so->curBlkno != InvalidBlockNumber) && 
(so->numKilled > 0))
+                                       gistkillitems(scan);
+ 
                                GISTSearchItem *item = 
getNextGISTSearchItem(so);
  
                                if (!item)
***************
*** 596,601 **** gistgettuple(PG_FUNCTION_ARGS)
--- 713,721 ----
  
                                CHECK_FOR_INTERRUPTS();
  
+                               /* save current item BlockNumber for next 
gistkillitems() call */
+                               so->curBlkno = item->blkno;
+ 
                                /*
                                 * While scanning a leaf page, ItemPointers of 
matching heap
                                 * tuples are stored in so->pageData.  If there 
are any on
*** a/src/backend/access/gist/gistscan.c
--- b/src/backend/access/gist/gistscan.c
***************
*** 93,98 **** gistbeginscan(PG_FUNCTION_ARGS)
--- 93,102 ----
                memset(scan->xs_orderbynulls, true, sizeof(bool) * 
scan->numberOfOrderBys);
        }
  
+       so->killedItems = NULL;         /* until needed */
+       so->numKilled = 0;
+       so->curBlkno = InvalidBlockNumber;
+ 
        scan->opaque = so;
  
        /*
*** a/src/include/access/gist.h
--- b/src/include/access/gist.h
***************
*** 41,48 ****
   */
  #define F_LEAF                                (1 << 0)        /* leaf page */
  #define F_DELETED                     (1 << 1)        /* the page has been 
deleted */
! #define F_TUPLES_DELETED      (1 << 2)        /* some tuples on the page are 
dead */
  #define F_FOLLOW_RIGHT                (1 << 3)        /* page to the right 
has no downlink */
  
  typedef XLogRecPtr GistNSN;
  
--- 41,51 ----
   */
  #define F_LEAF                                (1 << 0)        /* leaf page */
  #define F_DELETED                     (1 << 1)        /* the page has been 
deleted */
! #define F_TUPLES_DELETED      (1 << 2)        /* some tuples on the page were
!                                                                               
 * deleted */
  #define F_FOLLOW_RIGHT                (1 << 3)        /* page to the right 
has no downlink */
+ #define F_HAS_GARBAGE         (1 << 4)        /* some tuples on the page are 
dead,
+                                                                               
 * but not deleted yet */
  
  typedef XLogRecPtr GistNSN;
  
***************
*** 137,142 **** typedef struct GISTENTRY
--- 140,149 ----
  #define GistMarkTuplesDeleted(page) ( GistPageGetOpaque(page)->flags |= 
F_TUPLES_DELETED)
  #define GistClearTuplesDeleted(page)  ( GistPageGetOpaque(page)->flags &= 
~F_TUPLES_DELETED)
  
+ #define GistPageHasGarbage(page) ( GistPageGetOpaque(page)->flags & 
F_HAS_GARBAGE)
+ #define GistMarkPageHasGarbage(page) ( GistPageGetOpaque(page)->flags |= 
F_HAS_GARBAGE)
+ #define GistClearPageHasGarbage(page) ( GistPageGetOpaque(page)->flags &= 
~F_HAS_GARBAGE)
+ 
  #define GistFollowRight(page) ( GistPageGetOpaque(page)->flags & 
F_FOLLOW_RIGHT)
  #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= 
F_FOLLOW_RIGHT)
  #define GistClearFollowRight(page)    ( GistPageGetOpaque(page)->flags &= 
~F_FOLLOW_RIGHT)
*** a/src/include/access/gist_private.h
--- b/src/include/access/gist_private.h
***************
*** 22,27 ****
--- 22,28 ----
  #include "storage/bufmgr.h"
  #include "storage/buffile.h"
  #include "utils/hsearch.h"
+ #include "access/genam.h"
  
  /*
   * Maximum number of "halves" a page can be split into in one operation.
***************
*** 161,166 **** typedef struct GISTScanOpaqueData
--- 162,172 ----
        /* pre-allocated workspace arrays */
        double     *distances;          /* output area for gistindex_keytest */
  
+       /* info about killed items if any (killedItems is NULL if never used) */
+       ItemPointerData *killedItems;           /* heap pointers of killed 
items */
+       int                     numKilled;              /* number of currently 
stored items */
+       BlockNumber curBlkno;           /* current number of block */
+ 
        /* In a non-ordered search, returnable heap items are stored here: */
        GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
        OffsetNumber nPageData;         /* number of valid items in array */
drop table test;

CREATE TABLE test as (select box(point(x, x),point(x, x)) 
        from generate_series(1,500000) as x);

CREATE INDEX test_idx ON test USING gist (box);

SET enable_seqscan TO false;
SET enable_bitmapscan TO false;
SET enable_indexscan TO true;
SET enable_indexonlyscan TO true;

select pg_size_pretty(pg_relation_size('test_idx'));

delete from test where box && box(point(0,0), point(500000,500000));
select pg_size_pretty(pg_relation_size('test_idx'));

-- This query invokes gistkillitems()
select box from test where box && box(point(0,0), point(500000,500000));

insert into test (select box(point(x, x),point(x, x)) 
        from generate_series(1,500000) as x);

select pg_size_pretty(pg_relation_size('test_idx'));

insert into test (select box(point(x, x),point(x, x)) 
        from generate_series(1,500000) as x);

select pg_size_pretty(pg_relation_size('test_idx'));

insert into test (select box(point(x, x),point(x, x)) 
        from generate_series(1,500000) as x);

select pg_size_pretty(pg_relation_size('test_idx'));
DROP TABLE
SELECT 500000
CREATE INDEX
SET
SET
SET
SET
 pg_size_pretty 
----------------
 48 MB
(1 row)

DELETE 500000
 pg_size_pretty 
----------------
 48 MB
(1 row)

 box 
-----
(0 rows)

INSERT 0 500000
 pg_size_pretty 
----------------
 48 MB
(1 row)

INSERT 0 500000
 pg_size_pretty 
----------------
 95 MB
(1 row)

INSERT 0 500000
 pg_size_pretty 
----------------
 142 MB
(1 row)
DROP TABLE
SELECT 500000
CREATE INDEX
SET
SET
SET
SET
 pg_size_pretty 
----------------
 48 MB
(1 row)

DELETE 500000
 pg_size_pretty 
----------------
 48 MB
(1 row)

 box 
-----
(0 rows)

INSERT 0 500000
 pg_size_pretty 
----------------
 48 MB
(1 row)

INSERT 0 500000
 pg_size_pretty 
----------------
 48 MB
(1 row)

INSERT 0 500000
 pg_size_pretty 
----------------
 95 MB
(1 row)
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to