On 23.07.2020 20:39, Anastasia Lubennikova wrote:
One of our clients caught an error "failed to find parent tuple for heap-only tuple at (50661,130) in table "tbl'" in PostgreSQL v12.

Steps to reproduce (REL_12_STABLE):

1) Create table with primary key, create brin index, fill table with some initial data:

create table tbl (id int primary key, a int) with (fillfactor=50);
create index idx on tbl using brin (a) with (autosummarize=on);
insert into tbl select i, i from generate_series(0,100000) as i;

2) Run script test_brin.sql using pgbench:

 pgbench postgres -f ../review/brin_test.sql  -n -T 120

The script is a bit messy because I was trying to reproduce a problematic workload. Though I didn't manage to simplify it. The idea is that it inserts new values into the table to produce unindexed pages and also updates some values to trigger HOT-updates on these pages.

3) Open psql session and run brin_summarize_new_values

select brin_summarize_new_values('idx'::regclass::oid); \watch 2

Wait a bit. And in psql you will see the ERROR.

This error is caused by the problem with root_offsets array bounds. It occurs if a new HOT tuple was inserted after we've collected root_offsets, and thus we don't have root_offset for tuple's offnum. Concurrent insertions are possible, because brin_summarize_new_values() only holds ShareUpdateLock on table and no lock (only pin) on the page.

The draft fix is in the attachments. It saves root_offsets_size and checks that we only access valid fields. Patch also adds some debug messages, just to ensure that problem was caught.

TODO:

- check if  heapam_index_validate_scan() has the same problem
- code cleanup
- test other PostgreSQL versions

[1] https://www.postgresql.org/message-id/flat/CA%2BTgmoYgwjmmjK24Qxb_vWAu8_Hh7gfVFcr3%2BR7ocdLvYOWJXg%40mail.gmail.com


Here is the updated version of the fix.
The problem can be reproduced on all supported versions, so I suggest to backpatch it. Code slightly changed in v12, so here are two patches: one for versions 9.5 to 11 and another for versions from 12 to master.

As for heapam_index_validate_scan(), I've tried to reproduce the same error with CREATE INDEX CONCURRENTLY, but haven't found any problem with it.

--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index fc19f40a2e3..cb29a833663 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1176,6 +1176,7 @@ heapam_index_build_range_scan(Relation heapRelation,
 	BlockNumber previous_blkno = InvalidBlockNumber;
 	BlockNumber root_blkno = InvalidBlockNumber;
 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+	OffsetNumber root_offsets_size = 0;
 
 	/*
 	 * sanity checks
@@ -1341,6 +1342,11 @@ heapam_index_build_range_scan(Relation heapRelation,
 		 * buffer continuously while visiting the page, so no pruning
 		 * operation can occur either.
 		 *
+		 * It is essential, though, to check the root_offsets_size bound
+		 * before accessing the array, because concurrently inserted HOT tuples
+		 * don't have a valid cached root offset and we need to build the map
+		 * once again for them.
+		 *
 		 * Also, although our opinions about tuple liveness could change while
 		 * we scan the page (due to concurrent transaction commits/aborts),
 		 * the chain root locations won't, so this info doesn't need to be
@@ -1355,7 +1361,7 @@ heapam_index_build_range_scan(Relation heapRelation,
 			Page		page = BufferGetPage(hscan->rs_cbuf);
 
 			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-			heap_get_root_tuples(page, root_offsets);
+			root_offsets_size = heap_get_root_tuples(page, root_offsets);
 			LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
 			root_blkno = hscan->rs_cblock;
@@ -1643,6 +1649,22 @@ heapam_index_build_range_scan(Relation heapRelation,
 			rootTuple = *heapTuple;
 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
 
+			/*
+			 * As we do not hold buffer lock, concurrent insertion can happen.
+			 * If so, collect the map once again to find the root offset for
+			 * the new tuple.
+			 */
+			if (root_offsets_size < offnum)
+			{
+				Page	page = BufferGetPage(hscan->rs_cbuf);
+
+				LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+				root_offsets_size = heap_get_root_tuples(page, root_offsets);
+				LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+			}
+
+			Assert(root_offsets_size >= offnum);
+
 			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
 				ereport(ERROR,
 						(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 0efe3ce9995..5ddb123f30c 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -740,12 +740,15 @@ heap_page_prune_execute(Buffer buffer,
  * Note: The information collected here is valid only as long as the caller
  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
  * and reused by a completely unrelated tuple.
+ *
+ * The return value is the size of the root_offsets array. The caller must
+ * never access values that are out of this bound.
  */
-void
+OffsetNumber
 heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 {
-	OffsetNumber offnum,
-				maxoff;
+	OffsetNumber offnum, maxoff;
+	OffsetNumber last_valid_offnum = 0;
 
 	MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
 
@@ -778,6 +781,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 			 * Remember it in the mapping.
 			 */
 			root_offsets[offnum - 1] = offnum;
+			if (offnum > last_valid_offnum)
+				last_valid_offnum = offnum;
 
 			/* If it's not the start of a HOT-chain, we're done with it */
 			if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -820,6 +825,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 
 			/* Remember the root line pointer for this item */
 			root_offsets[nextoffnum - 1] = offnum;
+			if (nextoffnum > last_valid_offnum)
+				last_valid_offnum = nextoffnum;
 
 			/* Advance to next chain member, if any */
 			if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -832,4 +839,6 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 			priorXmax = HeapTupleHeaderGetUpdateXid(htup);
 		}
 	}
+
+	return last_valid_offnum;
 }
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index dffb57bf11a..45f8622c6cb 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -181,7 +181,7 @@ extern void heap_page_prune_execute(Buffer buffer,
 									OffsetNumber *redirected, int nredirected,
 									OffsetNumber *nowdead, int ndead,
 									OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
 
 /* in heap/syncscan.c */
 extern void ss_report_location(Relation rel, BlockNumber location);
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 6ff92516eda..51387d015e2 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -739,12 +739,15 @@ heap_page_prune_execute(Buffer buffer,
  * Note: The information collected here is valid only as long as the caller
  * holds a pin on the buffer. Once pin is released, a tuple might be pruned
  * and reused by a completely unrelated tuple.
+ *
+ * The return value is the size of the root_offsets array. The caller must
+ * never access values that are out of this bound.
  */
-void
+OffsetNumber
 heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 {
-	OffsetNumber offnum,
-				maxoff;
+	OffsetNumber offnum, maxoff;
+	OffsetNumber last_valid_offnum = 0;
 
 	MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
 
@@ -777,6 +780,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 			 * Remember it in the mapping.
 			 */
 			root_offsets[offnum - 1] = offnum;
+			if (offnum > last_valid_offnum)
+				last_valid_offnum = offnum;
 
 			/* If it's not the start of a HOT-chain, we're done with it */
 			if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -819,6 +824,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 
 			/* Remember the root line pointer for this item */
 			root_offsets[nextoffnum - 1] = offnum;
+			if (nextoffnum > last_valid_offnum)
+				last_valid_offnum = nextoffnum;
 
 			/* Advance to next chain member, if any */
 			if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -828,4 +835,6 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 			priorXmax = HeapTupleHeaderGetUpdateXid(htup);
 		}
 	}
+
+	return last_valid_offnum;
 }
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index f5c12d3d1c9..66ce910ca81 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2288,6 +2288,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
 	TransactionId OldestXmin;
 	BlockNumber root_blkno = InvalidBlockNumber;
 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+	OffsetNumber root_offsets_size = 0;
 
 	/*
 	 * sanity checks
@@ -2389,6 +2390,11 @@ IndexBuildHeapRangeScan(Relation heapRelation,
 		 * buffer continuously while visiting the page, so no pruning
 		 * operation can occur either.
 		 *
+		 * It is essential, though, to check the root_offsets_size bound
+		 * before accessing the array, because concurrently inserted HOT tuples
+		 * don't have a valid cached root offset and we need to build the map
+		 * once again for them.
+		 *
 		 * Also, although our opinions about tuple liveness could change while
 		 * we scan the page (due to concurrent transaction commits/aborts),
 		 * the chain root locations won't, so this info doesn't need to be
@@ -2403,7 +2409,7 @@ IndexBuildHeapRangeScan(Relation heapRelation,
 			Page		page = BufferGetPage(scan->rs_cbuf);
 
 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
-			heap_get_root_tuples(page, root_offsets);
+			root_offsets_size = heap_get_root_tuples(page, root_offsets);
 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
 
 			root_blkno = scan->rs_cblock;
@@ -2659,6 +2665,22 @@ IndexBuildHeapRangeScan(Relation heapRelation,
 			rootTuple = *heapTuple;
 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
 
+			/*
+			 * As we do not hold buffer lock, concurrent insertion can happen.
+			 * If so, collect the map once again to find the root offset for
+			 * the new tuple.
+			 */
+			if (root_offsets_size < offnum)
+			{
+				Page	page = BufferGetPage(scan->rs_cbuf);
+
+				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+				root_offsets_size = heap_get_root_tuples(page, root_offsets);
+				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+			}
+
+			Assert(root_offsets_size >= offnum);
+
 			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
 				ereport(ERROR,
 						(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 175509812da..bec2a31902f 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -190,7 +190,7 @@ extern void heap_page_prune_execute(Buffer buffer,
 						OffsetNumber *redirected, int nredirected,
 						OffsetNumber *nowdead, int ndead,
 						OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
 
 /* in heap/syncscan.c */
 extern void ss_report_location(Relation rel, BlockNumber location);

Reply via email to