On 13/09/2025 10:10 PM, Peter Geoghegan wrote:
On Wed, Sep 3, 2025 at 2:25 AM Konstantin Knizhnik<[email protected]> wrote:
Do you suggest to replace `PageGetTempPage` with using local buffers?
Or may be change `PageGetTempPage*` to accept parameter with provided
local buffer?
I think that _bt_split could easily be switched over to using 2 local
PGAlignedBlock variables, removing its use of PageGetTempPage. I don't
think that it is necessary to consider other PageGetTempPage callers.
Attached please find updated version of the patch which uses
PGAlignedBlock instead of PageGetTempPage.
diff --git a/src/backend/access/nbtree/nbtinsert.c
b/src/backend/access/nbtree/nbtinsert.c
index be60781fc98..9a62e807f81 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -1473,6 +1473,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
Page origpage;
Page leftpage,
rightpage;
+ PGAlignedBlock leftpage_buf,
+ rightpage_buf;
BlockNumber origpagenumber,
rightpagenumber;
BTPageOpaque ropaque,
@@ -1543,8 +1545,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
firstrightoff = _bt_findsplitloc(rel, origpage, newitemoff, newitemsz,
newitem, &newitemonleft);
- /* Allocate temp buffer for leftpage */
- leftpage = PageGetTempPage(origpage);
+ leftpage = leftpage_buf.data;
+ memcpy(leftpage, origpage, BLCKSZ);
_bt_pageinit(leftpage, BufferGetPageSize(buf));
lopaque = BTPageGetOpaque(leftpage);
@@ -1707,19 +1709,13 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
/*
* Acquire a new right page to split into, now that left page has a new
- * high key. From here on, it's not okay to throw an error without
- * zeroing rightpage first. This coding rule ensures that we won't
- * confuse future VACUUM operations, which might otherwise try to
re-find
- * a downlink to a leftover junk page as the page undergoes deletion.
- *
- * It would be reasonable to start the critical section just after the
new
- * rightpage buffer is acquired instead; that would allow us to avoid
- * leftover junk pages without bothering to zero rightpage. We do it
this
- * way because it avoids an unnecessary PANIC when either origpage or
its
- * existing sibling page are corrupt.
+ * high key. To prevent confision of VACUUM with orphan page, we also
+ * first fill in-mempory copy oif this page.
*/
rbuf = _bt_allocbuf(rel, heaprel);
- rightpage = BufferGetPage(rbuf);
+ rightpage = rightpage_buf.data;
+ memcpy(rightpage, BufferGetPage(rbuf), BLCKSZ);
+ memset(BufferGetPage(rbuf), 0, BLCKSZ);
rightpagenumber = BufferGetBlockNumber(rbuf);
/* rightpage was initialized by _bt_allocbuf */
ropaque = BTPageGetOpaque(rightpage);
@@ -1768,7 +1764,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
if (PageAddItem(rightpage, (Item) righthighkey, itemsz,
afterrightoff,
false, false) ==
InvalidOffsetNumber)
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add high key to the right
sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1816,7 +1811,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
if (!_bt_pgaddtup(leftpage, newitemsz, newitem,
afterleftoff,
false))
{
- memset(rightpage, 0,
BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to
the left sibling"
" while splitting block %u of
index \"%s\"",
origpagenumber,
RelationGetRelationName(rel));
@@ -1829,7 +1823,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz,
newitem, afterrightoff,
afterrightoff
== minusinfoff))
{
- memset(rightpage, 0,
BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to
the right sibling"
" while splitting block %u of
index \"%s\"",
origpagenumber,
RelationGetRelationName(rel));
@@ -1843,7 +1836,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
{
if (!_bt_pgaddtup(leftpage, itemsz, dataitem,
afterleftoff, false))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the left
sibling"
" while splitting block %u of index
\"%s\"",
origpagenumber,
RelationGetRelationName(rel));
@@ -1855,7 +1847,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, itemsz, dataitem,
afterrightoff,
afterrightoff ==
minusinfoff))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the
right sibling"
" while splitting block %u of index
\"%s\"",
origpagenumber,
RelationGetRelationName(rel));
@@ -1876,7 +1867,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz, newitem, afterrightoff,
afterrightoff == minusinfoff))
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the right
sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
@@ -1896,7 +1886,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
sopaque = BTPageGetOpaque(spage);
if (sopaque->btpo_prev != origpagenumber)
{
- memset(rightpage, 0, BufferGetPageSize(rbuf));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg_internal("right sibling's
left-link doesn't match: "
@@ -1939,9 +1928,13 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert
itup_key, Buffer buf,
* original. We need to do this before writing the WAL record, so that
* XLogInsert can WAL log an image of the page if necessary.
*/
- PageRestoreTempPage(leftpage, origpage);
+ memcpy(origpage, leftpage, BLCKSZ);
/* leftpage, lopaque must not be used below here */
+ rightpage = BufferGetPage(rbuf);
+ memcpy(rightpage, rightpage_buf.data, BLCKSZ);
+ ropaque = BTPageGetOpaque(rightpage);
+
MarkBufferDirty(buf);
MarkBufferDirty(rbuf);