On Wed, Jan 14, 2026 at 1:31 AM Andreas Karlsson <[email protected]> wrote: > Yeah, that was a quite big thinko. I have a attached a patch with the > thinko fixed but I am still not happy with it. I think I will try to use > atomics.h and see if that makes the code nicer to read. > > Also will after that see what I can do about pageinspect.
I believe that pageinspect's heap_page_items function needs to use get_page_from_raw -- see commit 14e9b18fed. Attached patch 0002-* shows what's required. I'm including your original 0001-* patch from your v2 here, to keep CFTester happy. We (Tomas Vondra and I) are treating this as a dependency for our index prefetching patch. It'd be good to get this done soon. -- Peter Geoghegan
From a4536e8478eb4a5afc3f56f1eb4b3152e7a54696 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan <[email protected]> Date: Thu, 5 Feb 2026 01:19:01 -0500 Subject: [PATCH v3 2/2] Make pageinspect's heap_page_items use get_page_from_raw. --- contrib/pageinspect/heapfuncs.c | 13 +++---------- contrib/pageinspect/rawpage.c | 8 +++----- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 8277fa256..761d24336 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -32,6 +32,7 @@ #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pageinspect.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" @@ -132,25 +133,17 @@ heap_page_items(PG_FUNCTION_ARGS) bytea *raw_page = PG_GETARG_BYTEA_P(0); heap_page_items_state *inter_call_data = NULL; FuncCallContext *fctx; - int raw_page_size; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to use raw page functions"))); - raw_page_size = VARSIZE(raw_page) - VARHDRSZ; - if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext mctx; - if (raw_page_size < SizeOfPageHeaderData) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("input page too small (%d bytes)", raw_page_size))); - fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); @@ -163,7 +156,7 @@ heap_page_items(PG_FUNCTION_ARGS) inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; - inter_call_data->page = VARDATA(raw_page); + inter_call_data->page = get_page_from_raw(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; @@ -209,7 +202,7 @@ heap_page_items(PG_FUNCTION_ARGS) if (ItemIdHasStorage(id) && lp_len >= MinHeapTupleSize && lp_offset == MAXALIGN(lp_offset) && - lp_offset + lp_len <= raw_page_size) + lp_offset + lp_len <= BLCKSZ) { HeapTupleHeader tuphdr; diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index af54afe5b..86fe245ca 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -208,11 +208,9 @@ get_raw_page_internal(text *relname, ForkNumber forknum, BlockNumber blkno) * Get a palloc'd, maxalign'ed page image from the result of get_raw_page() * * On machines with MAXALIGN = 8, the payload of a bytea is not maxaligned, - * since it will start 4 bytes into a palloc'd value. On alignment-picky - * machines, this will cause failures in accesses to 8-byte-wide values - * within the page. We don't need to worry if accessing only 4-byte or - * smaller fields, but when examining a struct that contains 8-byte fields, - * use this function for safety. + * since it will start 4 bytes into a palloc'd value. PageHeaderData requires + * 8 byte alignment, so always use this function when accessing page header + * fields from a raw page bytea. */ Page get_page_from_raw(bytea *raw_page) -- 2.51.0
From 4b6df3e3944aea238b4bc329a9d595cda6da608d Mon Sep 17 00:00:00 2001 From: Andreas Karlsson <[email protected]> Date: Fri, 21 Nov 2025 10:15:06 +0100 Subject: [PATCH v3 1/2] Do not lock in BufferGetLSNAtomic() on archs with 8 byte atomic reads On platforms where we can read or write the whole LSN to a buffer page we do not need to lock the buffer header to be sure we do not get a torn LSN. To ahcieve this we need to make sure we always read and write the while LSN and do not write it field by field. While working on this I converted PageXLogRecPtr from a struct with two uint32 fields into an uint64 to make the intent more clear. Idea by Andres Freund. --- src/include/access/gist.h | 4 +-- src/include/storage/bufpage.h | 48 +++++++++++++++++++++-------- src/backend/access/common/bufmask.c | 2 +- src/backend/storage/buffer/bufmgr.c | 12 ++++++-- 4 files changed, 47 insertions(+), 19 deletions(-) diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 8fa30126f..9b385b13a 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -186,8 +186,8 @@ typedef struct GISTENTRY #define GistMarkFollowRight(page) ( GistPageGetOpaque(page)->flags |= F_FOLLOW_RIGHT) #define GistClearFollowRight(page) ( GistPageGetOpaque(page)->flags &= ~F_FOLLOW_RIGHT) -#define GistPageGetNSN(page) ( PageXLogRecPtrGet(GistPageGetOpaque(page)->nsn)) -#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(GistPageGetOpaque(page)->nsn, val)) +#define GistPageGetNSN(page) ( PageXLogRecPtrGet(&GistPageGetOpaque(page)->nsn)) +#define GistPageSetNSN(page, val) ( PageXLogRecPtrSet(&GistPageGetOpaque(page)->nsn, val)) /* diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index ae3725b3b..2dbd9f19e 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -91,23 +91,45 @@ typedef uint16 LocationIndex; /* - * For historical reasons, the 64-bit LSN value is stored as two 32-bit - * values. + * For historical reasons, the storage of 64-bit LSN values depends on the + * endianess because the field used to be stored as two 32-bit values. When + * reading and writing the LSN we need to convert between the two formats. + * + * We are careful to try to treat the LSN as a single uint64 so callers like + * BufferGetLSNAtomic() can be sure there are no torn reads or writes. */ -typedef struct -{ - uint32 xlogid; /* high bits */ - uint32 xrecoff; /* low bits */ -} PageXLogRecPtr; +typedef uint64 PageXLogRecPtr; + +#ifdef WORDS_BIGENDIAN static inline XLogRecPtr -PageXLogRecPtrGet(PageXLogRecPtr val) +PageXLogRecPtrGet(const PageXLogRecPtr *val) { - return (uint64) val.xlogid << 32 | val.xrecoff; + return *val; } -#define PageXLogRecPtrSet(ptr, lsn) \ - ((ptr).xlogid = (uint32) ((lsn) >> 32), (ptr).xrecoff = (uint32) (lsn)) +static inline void +PageXLogRecPtrSet(PageXLogRecPtr *ptr, XLogRecPtr lsn) +{ + *ptr = lsn; +} + +#else + +static inline XLogRecPtr +PageXLogRecPtrGet(const volatile PageXLogRecPtr *val) +{ + PageXLogRecPtr tmp = *val; + return (tmp << 32) | (tmp >> 32); +} + +static inline void +PageXLogRecPtrSet(volatile PageXLogRecPtr *ptr, XLogRecPtr lsn) +{ + *ptr = (lsn << 32) | (lsn >> 32); +} + +#endif /* * disk page organization @@ -385,12 +407,12 @@ PageGetMaxOffsetNumber(const PageData *page) static inline XLogRecPtr PageGetLSN(const PageData *page) { - return PageXLogRecPtrGet(((const PageHeaderData *) page)->pd_lsn); + return PageXLogRecPtrGet(&((const PageHeaderData *) page)->pd_lsn); } static inline void PageSetLSN(Page page, XLogRecPtr lsn) { - PageXLogRecPtrSet(((PageHeader) page)->pd_lsn, lsn); + PageXLogRecPtrSet(&((PageHeader) page)->pd_lsn, lsn); } static inline bool diff --git a/src/backend/access/common/bufmask.c b/src/backend/access/common/bufmask.c index 1a9e7bea5..8a67bfa1a 100644 --- a/src/backend/access/common/bufmask.c +++ b/src/backend/access/common/bufmask.c @@ -32,7 +32,7 @@ mask_page_lsn_and_checksum(Page page) { PageHeader phdr = (PageHeader) page; - PageXLogRecPtrSet(phdr->pd_lsn, (uint64) MASK_MARKER); + PageXLogRecPtrSet(&phdr->pd_lsn, (uint64) MASK_MARKER); phdr->pd_checksum = MASK_MARKER; } diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 7241477ca..290239484 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4626,13 +4626,18 @@ BufferIsPermanent(Buffer buffer) /* * BufferGetLSNAtomic - * Retrieves the LSN of the buffer atomically using a buffer header lock. - * This is necessary for some callers who may not have an exclusive lock - * on the buffer. + * Retrieves the LSN of the buffer atomically. + * + * On platforms without 8 byte atomic reads/write we need to take a + * header lock. This is necessary for some callers who may not have an + * exclusive lock on the buffer. */ XLogRecPtr BufferGetLSNAtomic(Buffer buffer) { +#ifdef PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY + return PageGetLSN(BufferGetPage(buffer)); +#else char *page = BufferGetPage(buffer); BufferDesc *bufHdr; XLogRecPtr lsn; @@ -4653,6 +4658,7 @@ BufferGetLSNAtomic(Buffer buffer) UnlockBufHdr(bufHdr); return lsn; +#endif } /* --------------------------------------------------------------------- -- 2.51.0
