On 23.07.2020 20:39, Anastasia Lubennikova wrote:
One of our clients caught an error "failed to find parent tuple for
heap-only tuple at (50661,130) in table "tbl'" in PostgreSQL v12.
Steps to reproduce (REL_12_STABLE):
1) Create table with primary key, create brin index, fill table with
some initial data:
create table tbl (id int primary key, a int) with (fillfactor=50);
create index idx on tbl using brin (a) with (autosummarize=on);
insert into tbl select i, i from generate_series(0,100000) as i;
2) Run script test_brin.sql using pgbench:
pgbench postgres -f ../review/brin_test.sql -n -T 120
The script is a bit messy because I was trying to reproduce a
problematic workload. Though I didn't manage to simplify it.
The idea is that it inserts new values into the table to produce
unindexed pages and also updates some values to trigger HOT-updates on
these pages.
3) Open psql session and run brin_summarize_new_values
select brin_summarize_new_values('idx'::regclass::oid); \watch 2
Wait a bit. And in psql you will see the ERROR.
This error is caused by the problem with root_offsets array bounds. It
occurs if a new HOT tuple was inserted after we've collected
root_offsets, and thus we don't have root_offset for tuple's offnum.
Concurrent insertions are possible, because
brin_summarize_new_values() only holds ShareUpdateLock on table and no
lock (only pin) on the page.
The draft fix is in the attachments. It saves root_offsets_size and
checks that we only access valid fields.
Patch also adds some debug messages, just to ensure that problem was
caught.
TODO:
- check if heapam_index_validate_scan() has the same problem
- code cleanup
- test other PostgreSQL versions
[1]
https://www.postgresql.org/message-id/flat/CA%2BTgmoYgwjmmjK24Qxb_vWAu8_Hh7gfVFcr3%2BR7ocdLvYOWJXg%40mail.gmail.com
Here is the updated version of the fix.
The problem can be reproduced on all supported versions, so I suggest to
backpatch it.
Code slightly changed in v12, so here are two patches: one for versions
9.5 to 11 and another for versions from 12 to master.
As for heapam_index_validate_scan(), I've tried to reproduce the same
error with CREATE INDEX CONCURRENTLY, but haven't found any problem with it.
--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index fc19f40a2e3..cb29a833663 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1176,6 +1176,7 @@ heapam_index_build_range_scan(Relation heapRelation,
BlockNumber previous_blkno = InvalidBlockNumber;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+ OffsetNumber root_offsets_size = 0;
/*
* sanity checks
@@ -1341,6 +1342,11 @@ heapam_index_build_range_scan(Relation heapRelation,
* buffer continuously while visiting the page, so no pruning
* operation can occur either.
*
+ * It is essential, though, to check the root_offsets_size bound
+ * before accessing the array, because concurrently inserted HOT tuples
+ * don't have a valid cached root offset and we need to build the map
+ * once again for them.
+ *
* Also, although our opinions about tuple liveness could change while
* we scan the page (due to concurrent transaction commits/aborts),
* the chain root locations won't, so this info doesn't need to be
@@ -1355,7 +1361,7 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = hscan->rs_cblock;
@@ -1643,6 +1649,22 @@ heapam_index_build_range_scan(Relation heapRelation,
rootTuple = *heapTuple;
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
+ /*
+ * As we do not hold buffer lock, concurrent insertion can happen.
+ * If so, collect the map once again to find the root offset for
+ * the new tuple.
+ */
+ if (root_offsets_size < offnum)
+ {
+ Page page = BufferGetPage(hscan->rs_cbuf);
+
+ LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
+ LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ }
+
+ Assert(root_offsets_size >= offnum);
+
if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 0efe3ce9995..5ddb123f30c 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -740,12 +740,15 @@ heap_page_prune_execute(Buffer buffer,
* Note: The information collected here is valid only as long as the caller
* holds a pin on the buffer. Once pin is released, a tuple might be pruned
* and reused by a completely unrelated tuple.
+ *
+ * The return value is the size of the root_offsets array. The caller must
+ * never access values that are out of this bound.
*/
-void
+OffsetNumber
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
{
- OffsetNumber offnum,
- maxoff;
+ OffsetNumber offnum, maxoff;
+ OffsetNumber last_valid_offnum = 0;
MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
@@ -778,6 +781,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
* Remember it in the mapping.
*/
root_offsets[offnum - 1] = offnum;
+ if (offnum > last_valid_offnum)
+ last_valid_offnum = offnum;
/* If it's not the start of a HOT-chain, we're done with it */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -820,6 +825,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* Remember the root line pointer for this item */
root_offsets[nextoffnum - 1] = offnum;
+ if (nextoffnum > last_valid_offnum)
+ last_valid_offnum = nextoffnum;
/* Advance to next chain member, if any */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -832,4 +839,6 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
}
+
+ return last_valid_offnum;
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index dffb57bf11a..45f8622c6cb 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -181,7 +181,7 @@ extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
/* in heap/syncscan.c */
extern void ss_report_location(Relation rel, BlockNumber location);
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 6ff92516eda..51387d015e2 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -739,12 +739,15 @@ heap_page_prune_execute(Buffer buffer,
* Note: The information collected here is valid only as long as the caller
* holds a pin on the buffer. Once pin is released, a tuple might be pruned
* and reused by a completely unrelated tuple.
+ *
+ * The return value is the size of the root_offsets array. The caller must
+ * never access values that are out of this bound.
*/
-void
+OffsetNumber
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
{
- OffsetNumber offnum,
- maxoff;
+ OffsetNumber offnum, maxoff;
+ OffsetNumber last_valid_offnum = 0;
MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
@@ -777,6 +780,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
* Remember it in the mapping.
*/
root_offsets[offnum - 1] = offnum;
+ if (offnum > last_valid_offnum)
+ last_valid_offnum = offnum;
/* If it's not the start of a HOT-chain, we're done with it */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -819,6 +824,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* Remember the root line pointer for this item */
root_offsets[nextoffnum - 1] = offnum;
+ if (nextoffnum > last_valid_offnum)
+ last_valid_offnum = nextoffnum;
/* Advance to next chain member, if any */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -828,4 +835,6 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
}
+
+ return last_valid_offnum;
}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index f5c12d3d1c9..66ce910ca81 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2288,6 +2288,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
TransactionId OldestXmin;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+ OffsetNumber root_offsets_size = 0;
/*
* sanity checks
@@ -2389,6 +2390,11 @@ IndexBuildHeapRangeScan(Relation heapRelation,
* buffer continuously while visiting the page, so no pruning
* operation can occur either.
*
+ * It is essential, though, to check the root_offsets_size bound
+ * before accessing the array, because concurrently inserted HOT tuples
+ * don't have a valid cached root offset and we need to build the map
+ * once again for them.
+ *
* Also, although our opinions about tuple liveness could change while
* we scan the page (due to concurrent transaction commits/aborts),
* the chain root locations won't, so this info doesn't need to be
@@ -2403,7 +2409,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
Page page = BufferGetPage(scan->rs_cbuf);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = scan->rs_cblock;
@@ -2659,6 +2665,22 @@ IndexBuildHeapRangeScan(Relation heapRelation,
rootTuple = *heapTuple;
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
+ /*
+ * As we do not hold buffer lock, concurrent insertion can happen.
+ * If so, collect the map once again to find the root offset for
+ * the new tuple.
+ */
+ if (root_offsets_size < offnum)
+ {
+ Page page = BufferGetPage(scan->rs_cbuf);
+
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
+ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ }
+
+ Assert(root_offsets_size >= offnum);
+
if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 175509812da..bec2a31902f 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -190,7 +190,7 @@ extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
/* in heap/syncscan.c */
extern void ss_report_location(Relation rel, BlockNumber location);