Tomas Vondra wrote:

> Unfortunately, I think we still have a problem ... I've been wondering
> if we end up producing correct indexes, so I've done a simple test.

Here's a proposed patch that should fix this problem (and does, in my
testing).  Would you please give it a try?

This patch changes two things:

1. in VACUUM or brin_summarize_new_values, we only process fully loaded
ranges, and ignore the partial range at end of table.

2. when summarization is requested on the partial range at the end of a
table, we acquire extension lock on the rel, then compute relation size
and run summarization with the lock held.  This guarantees that we don't
miss any pages.  This is bad for concurrency though, so it's only done
in that specific scenario.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From a760bfd44afeff8d1629599411ac7ce87acc7d26 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Thu, 2 Nov 2017 18:35:10 +0100
Subject: [PATCH] Fix summarization concurrent with relation extension

---
 src/backend/access/brin/brin.c | 91 ++++++++++++++++++++++++++++++++----------
 1 file changed, 70 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index b3aa6d1ced..7696c0700c 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -29,6 +29,7 @@
 #include "postmaster/autovacuum.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
+#include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/index_selfuncs.h"
 #include "utils/memutils.h"
@@ -68,6 +69,10 @@ static BrinBuildState *initialize_brin_buildstate(Relation 
idxRel,
 static void terminate_brin_buildstate(BrinBuildState *state);
 static void brinsummarize(Relation index, Relation heapRel, BlockNumber 
pageRange,
                          double *numSummarized, double *numExisting);
+static void determine_summarization_range(BlockNumber pageRange,
+                                                         BlockNumber 
heapNumBlocks,
+                                                         BlockNumber 
pagesPerRange,
+                                                         BlockNumber 
*startBlk, BlockNumber *endBlk);
 static void form_and_insert_tuple(BrinBuildState *state);
 static void union_tuples(BrinDesc *bdesc, BrinMemTuple *a,
                         BrinTuple *b);
@@ -1253,30 +1258,16 @@ brinsummarize(Relation index, Relation heapRel, 
BlockNumber pageRange,
        BlockNumber startBlk;
        BlockNumber endBlk;
 
-       /* determine range of pages to process; nothing to do for an empty 
table */
-       heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
-       if (heapNumBlocks == 0)
-               return;
-
        revmap = brinRevmapInitialize(index, &pagesPerRange, NULL);
 
-       if (pageRange == BRIN_ALL_BLOCKRANGES)
+       /* determine range of pages to process */
+       heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
+       determine_summarization_range(pageRange, heapNumBlocks, pagesPerRange,
+                                                                 &startBlk, 
&endBlk);
+       if (startBlk > heapNumBlocks)
        {
-               startBlk = 0;
-               endBlk = heapNumBlocks;
-       }
-       else
-       {
-               startBlk = (pageRange / pagesPerRange) * pagesPerRange;
-               /* Nothing to do if start point is beyond end of table */
-               if (startBlk > heapNumBlocks)
-               {
-                       brinRevmapTerminate(revmap);
-                       return;
-               }
-               endBlk = startBlk + pagesPerRange;
-               if (endBlk > heapNumBlocks)
-                       endBlk = heapNumBlocks;
+               brinRevmapTerminate(revmap);
+               return;
        }
 
        /*
@@ -1287,9 +1278,31 @@ brinsummarize(Relation index, Relation heapRel, 
BlockNumber pageRange,
        {
                BrinTuple  *tup;
                OffsetNumber off;
+               bool            ext_lock_held = false;
 
                CHECK_FOR_INTERRUPTS();
 
+               /*
+                * Whenever we're scanning a range that would go past what we 
know to
+                * be end-of-relation, we need to ensure we scan to the true 
end of the
+                * relation, or we risk missing tuples in recently added pages. 
 To do
+                * this, we hold the relation extension lock from here till 
we're done
+                * summarizing, and compute the relation size afresh now.  The 
logic in
+                * determine_summarization_range ensures that this is not done 
in the
+                * common cases of vacuum or brin_summarize_new_values(), but 
instead
+                * only when we're specifically asked to summarize the last 
range in
+                * the relation.
+                */
+               if (heapBlk + pagesPerRange > heapNumBlocks)
+               {
+                       LockRelationForExtension(heapRel, ShareLock);
+                       ext_lock_held = true;
+
+                       heapNumBlocks = RelationGetNumberOfBlocks(heapRel);
+                       determine_summarization_range(pageRange, heapNumBlocks,
+                                                                               
  pagesPerRange, &startBlk, &endBlk);
+               }
+
                tup = brinGetTupleForHeapBlock(revmap, heapBlk, &buf, &off, 
NULL,
                                                                           
BUFFER_LOCK_SHARE, NULL);
                if (tup == NULL)
@@ -1317,6 +1330,9 @@ brinsummarize(Relation index, Relation heapRel, 
BlockNumber pageRange,
                                *numExisting += 1.0;
                        LockBuffer(buf, BUFFER_LOCK_UNLOCK);
                }
+
+               if (ext_lock_held)
+                       UnlockRelationForExtension(heapRel, ShareLock);
        }
 
        if (BufferIsValid(buf))
@@ -1332,6 +1348,39 @@ brinsummarize(Relation index, Relation heapRel, 
BlockNumber pageRange,
 }
 
 /*
+ * Determine start and end page numbers for a requested summarization run.
+ */
+static void
+determine_summarization_range(BlockNumber pageRange, BlockNumber heapNumBlocks,
+                                                         BlockNumber 
pagesPerRange,
+                                                         BlockNumber 
*startBlk, BlockNumber *endBlk)
+{
+       /*
+        * If we're requested to summarize the whole table, process only the 
page
+        * ranges that are already complete, that is, ignore the partial range 
at
+        * the end of the table.  This is safest: if the table grows 
concurrently
+        * with summarization, new pages don't matter because they're beyond 
what
+        * we're going to scan anyway.
+        *
+        * On the other hand, if we're requested to summarize a single range, do
+        * exactly that even if the range has not been fully loaded yet.  Caller
+        * must take steps to prevent the relation from growing concurrently.
+        */
+       if (pageRange == BRIN_ALL_BLOCKRANGES)
+       {
+               *startBlk = 0;
+               *endBlk = (heapNumBlocks / pagesPerRange) * pagesPerRange;
+       }
+       else
+       {
+               *startBlk = (pageRange / pagesPerRange) * pagesPerRange;
+               *endBlk = *startBlk + pagesPerRange;
+               if (*endBlk > heapNumBlocks)
+                       *endBlk = heapNumBlocks;
+       }
+}
+
+/*
  * Given a deformed tuple in the build state, convert it into the on-disk
  * format and insert it into the index, making the revmap point to it.
  */
-- 
2.11.0

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to