On Sun, Apr 7, 2024 at 9:08 AM John Naylor <johncnaylo...@gmail.com> wrote:
> I've attached a mostly-polished update on runtime embeddable values,
> storing up to 3 offsets in the child pointer (1 on 32-bit platforms).

And...since there's a new bump context patch, I wanted to anticipate
squeezing an update on top of that, if that gets committed. 0004/5 are
the v6 bump context, and 0006 uses it for vacuum. The rest are to show
it works -- the expected.out changes make possible problems in CI
easier to see. The allocation size is 16 bytes, so this difference is
entirely due to lack of chunk header:

aset: 6619136
bump: 5047296

(Note: assert builds still have the chunk header for sanity checking,
so this was done in a more optimized build)
From e2333bad47974a22120a4de3fad804a21757f631 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Tue, 20 Feb 2024 21:49:57 +1300
Subject: [PATCH v84 5/8] Introduce a bump memory allocator

---
 src/backend/nodes/gen_node_support.pl |   2 +-
 src/backend/utils/mmgr/Makefile       |   1 +
 src/backend/utils/mmgr/bump.c         | 818 ++++++++++++++++++++++++++
 src/backend/utils/mmgr/mcxt.c         |  15 +-
 src/backend/utils/mmgr/meson.build    |   1 +
 src/include/nodes/memnodes.h          |   3 +-
 src/include/utils/memutils.h          |   7 +
 src/include/utils/memutils_internal.h |  18 +-
 src/tools/pgindent/typedefs.list      |   2 +
 9 files changed, 862 insertions(+), 5 deletions(-)
 create mode 100644 src/backend/utils/mmgr/bump.c

diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index d4244facbb..81df3bdf95 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -149,7 +149,7 @@ my @abstract_types = qw(Node);
 # they otherwise don't participate in node support.
 my @extra_tags = qw(
   IntList OidList XidList
-  AllocSetContext GenerationContext SlabContext
+  AllocSetContext GenerationContext SlabContext BumpContext
   TIDBitmap
   WindowObjectData
 );
diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index dae3432c98..01a1fb8527 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -15,6 +15,7 @@ include $(top_builddir)/src/Makefile.global
 OBJS = \
 	alignedalloc.o \
 	aset.o \
+	bump.o \
 	dsa.o \
 	freepage.o \
 	generation.o \
diff --git a/src/backend/utils/mmgr/bump.c b/src/backend/utils/mmgr/bump.c
new file mode 100644
index 0000000000..f98a203a0c
--- /dev/null
+++ b/src/backend/utils/mmgr/bump.c
@@ -0,0 +1,818 @@
+/*-------------------------------------------------------------------------
+ *
+ * bump.c
+ *	  Bump allocator definitions.
+ *
+ * Bump is a MemoryContext implementation designed for memory usages which
+ * require allocating a large number of chunks, none of which ever need to be
+ * pfree'd or realloc'd.  Chunks allocated by this context have no chunk header
+ * and operations which ordinarily require looking at the chunk header cannot
+ * be performed.  For example, pfree, realloc, GetMemoryChunkSpace and
+ * GetMemoryChunkContext are all not possible with bump allocated chunks.  The
+ * only way to release memory allocated by this context type is to reset or
+ * delete the context.
+ *
+ * Portions Copyright (c) 2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/bump.c
+ *
+ *
+ *	Bump is best suited to cases which require a large number of short-lived
+ *	chunks where performance matters.  Because bump allocated chunks don't
+ *	have a chunk header, it can fit more chunks on each block.  This means we
+ *	can do more with less memory and fewer cache lines.  The reason it's best
+ *	suited for short-lived usages of memory is that ideally, pointers to bump
+ *	allocated chunks won't be visible to a large amount of code.  The more
+ *	code that operates on memory allocated by this allocator, the more chances
+ *	that some code will try to perform a pfree or one of the other operations
+ *	which are made impossible due to the lack of chunk header.  In order to
+ *	to detect accidental usage of the various disallowed operations, we do add
+ *	a MemoryChunk chunk header in MEMORY_CONTEXT_CHECKING builds and have the
+ *	various disallowed functions raise an ERROR.
+ *
+ *	Allocations are MAXALIGNed.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "lib/ilist.h"
+#include "port/pg_bitutils.h"
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "utils/memutils_memorychunk.h"
+#include "utils/memutils_internal.h"
+
+#define Bump_BLOCKHDRSZ	MAXALIGN(sizeof(BumpBlock))
+
+/* No chunk header unless built with MEMORY_CONTEXT_CHECKING */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define Bump_CHUNKHDRSZ	sizeof(MemoryChunk)
+#else
+#define Bump_CHUNKHDRSZ	0
+#endif
+
+#define Bump_CHUNK_FRACTION	8
+
+/* The keeper block is allocated in the same allocation as the set */
+#define KeeperBlock(set) ((BumpBlock *) ((char *) (set) + sizeof(BumpContext)))
+#define IsKeeperBlock(set, blk) (KeeperBlock(set) == (blk))
+
+typedef struct BumpBlock BumpBlock; /* forward reference */
+
+typedef struct BumpContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+
+	/* Bump context parameters */
+	uint32		initBlockSize;	/* initial block size */
+	uint32		maxBlockSize;	/* maximum block size */
+	uint32		nextBlockSize;	/* next block size to allocate */
+	uint32		allocChunkLimit;	/* effective chunk size limit */
+
+	dlist_head	blocks;			/* list of blocks with the block currently
+								 * being filled at the head */
+} BumpContext;
+
+/*
+ * BumpBlock
+ *		BumpBlock is the unit of memory that is obtained by bump.c from
+ *		malloc().  It contains zero or more allocations, which are the
+ *		units requested by palloc().
+ */
+struct BumpBlock
+{
+	dlist_node	node;			/* doubly-linked list of blocks */
+#ifdef MEMORY_CONTEXT_CHECKING
+	BumpContext *context;		/* pointer back to the owning context */
+#endif
+	char	   *freeptr;		/* start of free space in this block */
+	char	   *endptr;			/* end of space in this block */
+};
+
+/*
+ * BumpIsValid
+ *		True iff set is valid bump context.
+ */
+#define BumpIsValid(set) \
+	(PointerIsValid(set) && IsA(set, BumpContext))
+
+/*
+ * BumpBlockIsValid
+ *		True iff block is valid block of a bump context
+ */
+#define BumpBlockIsValid(block) \
+	(PointerIsValid(block) && BumpIsValid((block)->context))
+
+/*
+ * We always store external chunks on a dedicated block.  This makes fetching
+ * the block from an external chunk easy since it's always the first and only
+ * chunk on the block.
+ */
+#define ExternalChunkGetBlock(chunk) \
+	(BumpBlock *) ((char *) chunk - Bump_BLOCKHDRSZ)
+
+/* Inlined helper functions */
+static inline void BumpBlockInit(BumpContext *context, BumpBlock *block,
+								 Size blksize);
+static inline bool BumpBlockIsEmpty(BumpBlock *block);
+static inline void BumpBlockMarkEmpty(BumpBlock *block);
+static inline Size BumpBlockFreeBytes(BumpBlock *block);
+static inline void BumpBlockFree(BumpContext *set, BumpBlock *block);
+
+
+/*
+* BumpContextCreate
+*		Create a new Bump context.
+*
+* parent: parent context, or NULL if top-level context
+* name: name of context (must be statically allocated)
+* minContextSize: minimum context size
+* initBlockSize: initial allocation block size
+* maxBlockSize: maximum allocation block size
+*/
+MemoryContext
+BumpContextCreate(MemoryContext parent,
+				  const char *name,
+				  Size minContextSize,
+				  Size initBlockSize,
+				  Size maxBlockSize)
+{
+	Size		firstBlockSize;
+	Size		allocSize;
+	BumpContext *set;
+	BumpBlock  *block;
+
+	/* ensure MemoryChunk's size is properly maxaligned */
+	StaticAssertDecl(Bump_CHUNKHDRSZ == MAXALIGN(Bump_CHUNKHDRSZ),
+					 "sizeof(MemoryChunk) is not maxaligned");
+
+	/*
+	 * First, validate allocation parameters.  Asserts seem sufficient because
+	 * nobody varies their parameters at runtime.  We somewhat arbitrarily
+	 * enforce a minimum 1K block size.  We restrict the maximum block size to
+	 * MEMORYCHUNK_MAX_BLOCKOFFSET as MemoryChunks are limited to this in
+	 * regards to addressing the offset between the chunk and the block that
+	 * the chunk is stored on.  We would be unable to store the offset between
+	 * the chunk and block for any chunks that were beyond
+	 * MEMORYCHUNK_MAX_BLOCKOFFSET bytes into the block if the block was to be
+	 * larger than this.
+	 */
+	Assert(initBlockSize == MAXALIGN(initBlockSize) &&
+		   initBlockSize >= 1024);
+	Assert(maxBlockSize == MAXALIGN(maxBlockSize) &&
+		   maxBlockSize >= initBlockSize &&
+		   AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */
+	Assert(minContextSize == 0 ||
+		   (minContextSize == MAXALIGN(minContextSize) &&
+			minContextSize >= 1024 &&
+			minContextSize <= maxBlockSize));
+	Assert(maxBlockSize <= MEMORYCHUNK_MAX_BLOCKOFFSET);
+
+	/* Determine size of initial block */
+	allocSize = MAXALIGN(sizeof(BumpContext)) + Bump_BLOCKHDRSZ +
+		Bump_CHUNKHDRSZ;
+	if (minContextSize != 0)
+		allocSize = Max(allocSize, minContextSize);
+	else
+		allocSize = Max(allocSize, initBlockSize);
+
+	/*
+	 * Allocate the initial block.  Unlike other bump.c blocks, it starts with
+	 * the context header and its block header follows that.
+	 */
+	set = (BumpContext *) malloc(allocSize);
+	if (set == NULL)
+	{
+		MemoryContextStats(TopMemoryContext);
+		ereport(ERROR,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory"),
+				 errdetail("Failed while creating memory context \"%s\".",
+						   name)));
+	}
+
+	/*
+	 * Avoid writing code that can fail between here and MemoryContextCreate;
+	 * we'd leak the header if we ereport in this stretch.
+	 */
+	dlist_init(&set->blocks);
+
+	/* Fill in the initial block's block header */
+	block = (BumpBlock *) (((char *) set) + MAXALIGN(sizeof(BumpContext)));
+	/* determine the block size and initialize it */
+	firstBlockSize = allocSize - MAXALIGN(sizeof(BumpContext));
+	BumpBlockInit(set, block, firstBlockSize);
+
+	/* add it to the doubly-linked list of blocks */
+	dlist_push_head(&set->blocks, &block->node);
+
+	/*
+	 * Fill in BumpContext-specific header fields.  The Asserts above should
+	 * ensure that these all fit inside a uint32.
+	 */
+	set->initBlockSize = (uint32) initBlockSize;
+	set->maxBlockSize = (uint32) maxBlockSize;
+	set->nextBlockSize = (uint32) initBlockSize;
+
+	/*
+	 * Compute the allocation chunk size limit for this context.
+	 *
+	 * Limit the maximum size a non-dedicated chunk can be so that we can fit
+	 * at least Bump_CHUNK_FRACTION of chunks this big onto the maximum sized
+	 * block.  We must further limit this value so that it's no more than
+	 * MEMORYCHUNK_MAX_VALUE.  We're unable to have non-external chunks larger
+	 * than that value as we store the chunk size in the MemoryChunk 'value'
+	 * field in the call to MemoryChunkSetHdrMask().
+	 */
+	set->allocChunkLimit = Min(maxBlockSize, MEMORYCHUNK_MAX_VALUE);
+	while ((Size) (set->allocChunkLimit + Bump_CHUNKHDRSZ) >
+		   (Size) ((Size) (maxBlockSize - Bump_BLOCKHDRSZ) / Bump_CHUNK_FRACTION))
+		set->allocChunkLimit >>= 1;
+
+	/* Finally, do the type-independent part of context creation */
+	MemoryContextCreate((MemoryContext) set,
+						T_BumpContext,
+						MCTX_BUMP_ID,
+						parent,
+						name);
+
+	((MemoryContext) set)->mem_allocated = allocSize;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * BumpReset
+ *		Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context apart from the keeper
+ * block.
+ */
+void
+BumpReset(MemoryContext context)
+{
+	BumpContext *set = (BumpContext *) context;
+	dlist_mutable_iter miter;
+
+	Assert(BumpIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Check for corruption and leaks before freeing */
+	BumpCheck(context);
+#endif
+
+	dlist_foreach_modify(miter, &set->blocks)
+	{
+		BumpBlock  *block = dlist_container(BumpBlock, node, miter.cur);
+
+		if (IsKeeperBlock(set, block))
+			BumpBlockMarkEmpty(block);
+		else
+			BumpBlockFree(set, block);
+	}
+
+	/* Reset block size allocation sequence, too */
+	set->nextBlockSize = set->initBlockSize;
+
+	/* Ensure there is only 1 item in the dlist */
+	Assert(!dlist_is_empty(&set->blocks));
+	Assert(!dlist_has_next(&set->blocks, dlist_head_node(&set->blocks)));
+}
+
+/*
+ * BumpDelete
+ *		Free all memory which is allocated in the given context.
+ */
+void
+BumpDelete(MemoryContext context)
+{
+	/* Reset to release all releasable BumpBlocks */
+	BumpReset(context);
+	/* And free the context header and keeper block */
+	free(context);
+}
+
+/*
+ * Helper for BumpAlloc() that allocates an entire block for the chunk.
+ *
+ * BumpAlloc()'s comment explains why this is separate.
+ */
+pg_noinline
+static void *
+BumpAllocLarge(MemoryContext context, Size size, int flags)
+{
+	BumpContext *set = (BumpContext *) context;
+	BumpBlock  *block;
+#ifdef MEMORY_CONTEXT_CHECKING
+	MemoryChunk *chunk;
+#endif
+	Size		chunk_size;
+	Size		required_size;
+	Size		blksize;
+
+	/* validate 'size' is within the limits for the given 'flags' */
+	MemoryContextCheckSize(context, size, flags);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* ensure there's always space for the sentinel byte */
+	chunk_size = MAXALIGN(size + 1);
+#else
+	chunk_size = MAXALIGN(size);
+#endif
+
+	required_size = chunk_size + Bump_CHUNKHDRSZ;
+	blksize = required_size + Bump_BLOCKHDRSZ;
+
+	block = (BumpBlock *) malloc(blksize);
+	if (block == NULL)
+		return NULL;
+
+	context->mem_allocated += blksize;
+
+	/* the block is completely full */
+	block->freeptr = block->endptr = ((char *) block) + blksize;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* block with a single (used) chunk */
+	block->context = set;
+
+	chunk = (MemoryChunk *) (((char *) block) + Bump_BLOCKHDRSZ);
+
+	/* mark the MemoryChunk as externally managed */
+	MemoryChunkSetHdrMaskExternal(chunk, MCTX_BUMP_ID);
+
+	chunk->requested_size = size;
+	/* set mark to catch clobber of "unused" space */
+	Assert(size < chunk_size);
+	set_sentinel(MemoryChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) MemoryChunkGetPointer(chunk), size);
+#endif
+
+	/* add the block to the list of allocated blocks */
+	dlist_push_head(&set->blocks, &block->node);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Ensure any padding bytes are marked NOACCESS. */
+	VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size,
+							   chunk_size - size);
+
+	/* Disallow access to the chunk header. */
+	VALGRIND_MAKE_MEM_NOACCESS(chunk, Bump_CHUNKHDRSZ);
+
+	return MemoryChunkGetPointer(chunk);
+#else
+	return (void *) (((char *) block) + Bump_BLOCKHDRSZ);
+#endif
+}
+
+/*
+ * Small helper for allocating a new chunk from a chunk, to avoid duplicating
+ * the code between BumpAlloc() and BumpAllocFromNewBlock().
+ */
+static inline void *
+BumpAllocChunkFromBlock(MemoryContext context, BumpBlock *block, Size size,
+						Size chunk_size)
+{
+#ifdef MEMORY_CONTEXT_CHECKING
+	MemoryChunk *chunk;
+#else
+	void	   *ptr;
+#endif
+
+	/* validate we've been given a block with enough free space */
+	Assert(block != NULL);
+	Assert((block->endptr - block->freeptr) >= Bump_CHUNKHDRSZ + chunk_size);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	chunk = (MemoryChunk *) block->freeptr;
+#else
+	ptr = (void *) block->freeptr;
+#endif
+
+	/* point the freeptr beyond this chunk */
+	block->freeptr += (Bump_CHUNKHDRSZ + chunk_size);
+	Assert(block->freeptr <= block->endptr);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Prepare to initialize the chunk header. */
+	VALGRIND_MAKE_MEM_UNDEFINED(chunk, Bump_CHUNKHDRSZ);
+
+	MemoryChunkSetHdrMask(chunk, block, chunk_size, MCTX_BUMP_ID);
+	chunk->requested_size = size;
+	/* set mark to catch clobber of "unused" space */
+	Assert(size < chunk_size);
+	set_sentinel(MemoryChunkGetPointer(chunk), size);
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) MemoryChunkGetPointer(chunk), size);
+#endif
+
+	/* Ensure any padding bytes are marked NOACCESS. */
+	VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size,
+							   chunk_size - size);
+
+	/* Disallow access to the chunk header. */
+	VALGRIND_MAKE_MEM_NOACCESS(chunk, Bump_CHUNKHDRSZ);
+
+	return MemoryChunkGetPointer(chunk);
+#else
+	return ptr;
+#endif							/* MEMORY_CONTEXT_CHECKING */
+}
+
+/*
+ * Helper for BumpAlloc() that allocates a new block and returns a chunk
+ * allocated from it.
+ *
+ * BumpAlloc()'s comment explains why this is separate.
+ */
+pg_noinline
+static void *
+BumpAllocFromNewBlock(MemoryContext context, Size size, int flags,
+					  Size chunk_size)
+{
+	BumpContext *set = (BumpContext *) context;
+	BumpBlock  *block;
+	Size		blksize;
+	Size		required_size;
+
+	/*
+	 * The first such block has size initBlockSize, and we double the space in
+	 * each succeeding block, but not more than maxBlockSize.
+	 */
+	blksize = set->nextBlockSize;
+	set->nextBlockSize <<= 1;
+	if (set->nextBlockSize > set->maxBlockSize)
+		set->nextBlockSize = set->maxBlockSize;
+
+	/* we'll need space for the chunk, chunk hdr and block hdr */
+	required_size = chunk_size + Bump_CHUNKHDRSZ + Bump_BLOCKHDRSZ;
+	/* round the size up to the next power of 2 */
+	if (blksize < required_size)
+		blksize = pg_nextpower2_size_t(required_size);
+
+	block = (BumpBlock *) malloc(blksize);
+
+	if (block == NULL)
+		return MemoryContextAllocationFailure(context, size, flags);
+
+	context->mem_allocated += blksize;
+
+	/* initialize the new block */
+	BumpBlockInit(set, block, blksize);
+
+	/* add it to the doubly-linked list of blocks */
+	dlist_push_head(&set->blocks, &block->node);
+
+	return BumpAllocChunkFromBlock(context, block, size, chunk_size);
+}
+
+/*
+ * BumpAlloc
+ *		Returns a pointer to allocated memory of given size or raises an ERROR
+ *		on allocation failure, or returns NULL when flags contains
+ *		MCXT_ALLOC_NO_OOM.
+ *
+ * No request may exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - Bump_BLOCKHDRSZ - Bump_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ *
+ *
+ * Note: when using valgrind, it doesn't matter how the returned allocation
+ * is marked, as mcxt.c will set it to UNDEFINED.
+ * This function should only contain the most common code paths.  Everything
+ * else should be in pg_noinline helper functions, thus avoiding the overhead
+ * of creating a stack frame for the common cases.  Allocating memory is often
+ * a bottleneck in many workloads, so avoiding stack frame setup is
+ * worthwhile.  Helper functions should always directly return the newly
+ * allocated memory so that we can just return that address directly as a tail
+ * call.
+ */
+void *
+BumpAlloc(MemoryContext context, Size size, int flags)
+{
+	BumpContext *set = (BumpContext *) context;
+	BumpBlock  *block;
+	Size		chunk_size;
+	Size		required_size;
+
+	Assert(BumpIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* ensure there's always space for the sentinel byte */
+	chunk_size = MAXALIGN(size + 1);
+#else
+	chunk_size = MAXALIGN(size);
+#endif
+
+	/*
+	 * If requested size exceeds maximum for chunks we hand the the request
+	 * off to BumpAllocLarge().
+	 */
+	if (chunk_size > set->allocChunkLimit)
+		return BumpAllocLarge(context, size, flags);
+
+	required_size = chunk_size + Bump_CHUNKHDRSZ;
+
+	/*
+	 * Not an oversized chunk.  We try to first make use of the latest block,
+	 * but if there's not enough space in it we must allocate a new block.
+	 */
+	block = dlist_container(BumpBlock, node, dlist_head_node(&set->blocks));
+
+	if (BumpBlockFreeBytes(block) < required_size)
+		return BumpAllocFromNewBlock(context, size, flags, chunk_size);
+
+	/* The current block has space, so just allocate chunk there. */
+	return BumpAllocChunkFromBlock(context, block, size, chunk_size);
+
+}
+
+/*
+ * BumpBlockInit
+ *		Initializes 'block' assuming 'blksize'.  Does not update the context's
+ *		mem_allocated field.
+ */
+static inline void
+BumpBlockInit(BumpContext *context, BumpBlock *block, Size blksize)
+{
+#ifdef MEMORY_CONTEXT_CHECKING
+	block->context = context;
+#endif
+	block->freeptr = ((char *) block) + Bump_BLOCKHDRSZ;
+	block->endptr = ((char *) block) + blksize;
+
+	/* Mark unallocated space NOACCESS. */
+	VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, blksize - Bump_BLOCKHDRSZ);
+}
+
+/*
+ * BumpBlockIsEmpty
+ *		Returns true iff 'block' contains no chunks
+ */
+static inline bool
+BumpBlockIsEmpty(BumpBlock *block)
+{
+	/* it's empty if the freeptr has not moved */
+	return (block->freeptr == ((char *) block + Bump_BLOCKHDRSZ));
+}
+
+/*
+ * BumpBlockMarkEmpty
+ *		Set a block as empty.  Does not free the block.
+ */
+static inline void
+BumpBlockMarkEmpty(BumpBlock *block)
+{
+#if defined(USE_VALGRIND) || defined(CLOBBER_FREED_MEMORY)
+	char	   *datastart = ((char *) block) + Bump_BLOCKHDRSZ;
+#endif
+
+#ifdef CLOBBER_FREED_MEMORY
+	wipe_mem(datastart, block->freeptr - datastart);
+#else
+	/* wipe_mem() would have done this */
+	VALGRIND_MAKE_MEM_NOACCESS(datastart, block->freeptr - datastart);
+#endif
+
+	/* Reset the block, but don't return it to malloc */
+	block->freeptr = ((char *) block) + Bump_BLOCKHDRSZ;
+}
+
+/*
+ * BumpBlockFreeBytes
+ *		Returns the number of bytes free in 'block'
+ */
+static inline Size
+BumpBlockFreeBytes(BumpBlock *block)
+{
+	return (block->endptr - block->freeptr);
+}
+
+/*
+ * BumpBlockFree
+ *		Remove 'block' from 'set' and release the memory consumed by it.
+ */
+static inline void
+BumpBlockFree(BumpContext *set, BumpBlock *block)
+{
+	/* Make sure nobody tries to free the keeper block */
+	Assert(!IsKeeperBlock(set, block));
+
+	/* release the block from the list of blocks */
+	dlist_delete(&block->node);
+
+	((MemoryContext) set)->mem_allocated -= ((char *) block->endptr - (char *) block);
+
+#ifdef CLOBBER_FREED_MEMORY
+	wipe_mem(block, ((char *) block->endptr - (char *) block));
+#endif
+
+	free(block);
+}
+
+/*
+ * BumpFree
+ *		Unsupported.
+ */
+void
+BumpFree(void *pointer)
+{
+	elog(ERROR, "pfree is not supported by the bump memory allocator");
+}
+
+/*
+ * BumpRealloc
+ *		Unsupported.
+ */
+void *
+BumpRealloc(void *pointer, Size size, int flags)
+{
+	elog(ERROR, "%s is not supported by the bump memory allocator", "realloc");
+	return NULL;				/* keep compiler quiet */
+}
+
+/*
+ * BumpGetChunkContext
+ *		Unsupported.
+ */
+MemoryContext
+BumpGetChunkContext(void *pointer)
+{
+	elog(ERROR, "%s is not supported by the bump memory allocator", "GetMemoryChunkContext");
+	return NULL;				/* keep compiler quiet */
+}
+
+/*
+* BumpGetChunkSpace
+*		Given a currently-allocated chunk, determine the total space
+*		it occupies (including all memory-allocation overhead).
+*/
+Size
+BumpGetChunkSpace(void *pointer)
+{
+	elog(ERROR, "%s is not supported by the bump memory allocator", "GetMemoryChunkSpace");
+	return 0;					/* keep compiler quiet */
+}
+
+/*
+ * BumpIsEmpty
+ *		Is a BumpContext empty of any allocated space?
+ */
+bool
+BumpIsEmpty(MemoryContext context)
+{
+	BumpContext *set = (BumpContext *) context;
+	dlist_iter	iter;
+
+	Assert(BumpIsValid(set));
+
+	dlist_foreach(iter, &set->blocks)
+	{
+		BumpBlock  *block = dlist_container(BumpBlock, node, iter.cur);
+
+		if (!BumpBlockIsEmpty(block))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * BumpStats
+ *		Compute stats about memory consumption of a Bump context.
+ *
+ * printfunc: if not NULL, pass a human-readable stats string to this.
+ * passthru: pass this pointer through to printfunc.
+ * totals: if not NULL, add stats about this context into *totals.
+ * print_to_stderr: print stats to stderr if true, elog otherwise.
+ */
+void
+BumpStats(MemoryContext context, MemoryStatsPrintFunc printfunc,
+		  void *passthru, MemoryContextCounters *totals, bool print_to_stderr)
+{
+	BumpContext *set = (BumpContext *) context;
+	Size		nblocks = 0;
+	Size		totalspace = 0;
+	Size		freespace = 0;
+	dlist_iter	iter;
+
+	Assert(BumpIsValid(set));
+
+	dlist_foreach(iter, &set->blocks)
+	{
+		BumpBlock  *block = dlist_container(BumpBlock, node, iter.cur);
+
+		nblocks++;
+		totalspace += (block->endptr - (char *) block);
+		freespace += (block->endptr - block->freeptr);
+	}
+
+	if (printfunc)
+	{
+		char		stats_string[200];
+
+		snprintf(stats_string, sizeof(stats_string),
+				 "%zu total in %zu blocks; %zu free; %zu used",
+				 totalspace, nblocks, freespace, totalspace - freespace);
+		printfunc(context, passthru, stats_string, print_to_stderr);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += nblocks;
+		totals->totalspace += totalspace;
+		totals->freespace += freespace;
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * BumpCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+void
+BumpCheck(MemoryContext context)
+{
+	BumpContext *bump = (BumpContext *) context;
+	const char *name = context->name;
+	dlist_iter	iter;
+	Size		total_allocated = 0;
+
+	/* walk all blocks in this context */
+	dlist_foreach(iter, &bump->blocks)
+	{
+		BumpBlock  *block = dlist_container(BumpBlock, node, iter.cur);
+		int			nchunks;
+		char	   *ptr;
+		bool		has_external_chunk = false;
+
+		if (IsKeeperBlock(bump, block))
+			total_allocated += block->endptr - (char *) bump;
+		else
+			total_allocated += block->endptr - (char *) block;
+
+		/* check block belongs to the correct context */
+		if (block->context != bump)
+			elog(WARNING, "problem in Bump %s: bogus context link in block %p",
+				 name, block);
+
+		/* now walk through the chunks and count them */
+		nchunks = 0;
+		ptr = ((char *) block) + Bump_BLOCKHDRSZ;
+
+		while (ptr < block->freeptr)
+		{
+			MemoryChunk *chunk = (MemoryChunk *) ptr;
+			BumpBlock  *chunkblock;
+			Size		chunksize;
+
+			/* allow access to the chunk header */
+			VALGRIND_MAKE_MEM_DEFINED(chunk, Bump_CHUNKHDRSZ);
+
+			if (MemoryChunkIsExternal(chunk))
+			{
+				chunkblock = ExternalChunkGetBlock(chunk);
+				chunksize = block->endptr - (char *) MemoryChunkGetPointer(chunk);
+				has_external_chunk = true;
+			}
+			else
+			{
+				chunkblock = MemoryChunkGetBlock(chunk);
+				chunksize = MemoryChunkGetValue(chunk);
+			}
+
+			/* move to the next chunk */
+			ptr += (chunksize + Bump_CHUNKHDRSZ);
+
+			nchunks += 1;
+
+			/* chunks have both block and context pointers, so check both */
+			if (chunkblock != block)
+				elog(WARNING, "problem in Bump %s: bogus block link in block %p, chunk %p",
+					 name, block, chunk);
+		}
+
+		if (has_external_chunk && nchunks > 1)
+			elog(WARNING, "problem in Bump %s: external chunk on non-dedicated block %p",
+				 name, block);
+
+	}
+
+	Assert(total_allocated == context->mem_allocated);
+}
+
+#endif							/* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 52ae120af9..80abfdc9d7 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -100,6 +100,19 @@ static const MemoryContextMethods mcxt_methods[] = {
 	[MCTX_ALIGNED_REDIRECT_ID].check = NULL,	/* not required */
 #endif
 
+	/* bump.c */
+	[MCTX_BUMP_ID].alloc = BumpAlloc,
+	[MCTX_BUMP_ID].free_p = BumpFree,
+	[MCTX_BUMP_ID].realloc = BumpRealloc,
+	[MCTX_BUMP_ID].reset = BumpReset,
+	[MCTX_BUMP_ID].delete_context = BumpDelete,
+	[MCTX_BUMP_ID].get_chunk_context = BumpGetChunkContext,
+	[MCTX_BUMP_ID].get_chunk_space = BumpGetChunkSpace,
+	[MCTX_BUMP_ID].is_empty = BumpIsEmpty,
+	[MCTX_BUMP_ID].stats = BumpStats,
+#ifdef MEMORY_CONTEXT_CHECKING
+	[MCTX_BUMP_ID].check = BumpCheck,
+#endif
 
 	/*
 	 * Unused (as yet) IDs should have dummy entries here.  This allows us to
@@ -107,8 +120,6 @@ static const MemoryContextMethods mcxt_methods[] = {
 	 * seems sufficient to provide routines for the methods that might get
 	 * invoked from inspection of a chunk (see MCXT_METHOD calls below).
 	 */
-
-	BOGUS_MCTX(MCTX_7_UNUSED_ID),
 	BOGUS_MCTX(MCTX_8_UNUSED_ID),
 	BOGUS_MCTX(MCTX_9_UNUSED_ID),
 	BOGUS_MCTX(MCTX_10_UNUSED_ID),
diff --git a/src/backend/utils/mmgr/meson.build b/src/backend/utils/mmgr/meson.build
index 9dcf990cdc..dd43a6844c 100644
--- a/src/backend/utils/mmgr/meson.build
+++ b/src/backend/utils/mmgr/meson.build
@@ -3,6 +3,7 @@
 backend_sources += files(
   'alignedalloc.c',
   'aset.c',
+  'bump.c',
   'dsa.c',
   'freepage.c',
   'generation.c',
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index edc0257f36..c4c9fd3e3e 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -146,6 +146,7 @@ typedef struct MemoryContextData
 	((context) != NULL && \
 	 (IsA((context), AllocSetContext) || \
 	  IsA((context), SlabContext) || \
-	  IsA((context), GenerationContext)))
+	  IsA((context), GenerationContext) || \
+	  IsA((context), BumpContext)))
 
 #endif							/* MEMNODES_H */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 6e5fa72b0e..4446e14223 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -108,6 +108,13 @@ extern void ProcessLogMemoryContextInterrupt(void);
  * Memory-context-type-specific functions
  */
 
+/* bump.c */
+extern MemoryContext BumpContextCreate(MemoryContext parent,
+									   const char *name,
+									   Size minContextSize,
+									   Size initBlockSize,
+									   Size maxBlockSize);
+
 /* aset.c */
 extern MemoryContext AllocSetContextCreateInternal(MemoryContext parent,
 												   const char *name,
diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h
index c3f010b595..d39392a873 100644
--- a/src/include/utils/memutils_internal.h
+++ b/src/include/utils/memutils_internal.h
@@ -79,6 +79,22 @@ extern void *AlignedAllocRealloc(void *pointer, Size size, int flags);
 extern MemoryContext AlignedAllocGetChunkContext(void *pointer);
 extern Size AlignedAllocGetChunkSpace(void *pointer);
 
+ /* These functions implement the MemoryContext API for the Bump context. */
+extern void *BumpAlloc(MemoryContext context, Size size, int flags);
+extern void BumpFree(void *pointer);
+extern void *BumpRealloc(void *pointer, Size size, int flags);
+extern void BumpReset(MemoryContext context);
+extern void BumpDelete(MemoryContext context);
+extern MemoryContext BumpGetChunkContext(void *pointer);
+extern Size BumpGetChunkSpace(void *pointer);
+extern bool BumpIsEmpty(MemoryContext context);
+extern void BumpStats(MemoryContext context, MemoryStatsPrintFunc printfunc,
+					  void *passthru, MemoryContextCounters *totals,
+					  bool print_to_stderr);
+#ifdef MEMORY_CONTEXT_CHECKING
+extern void BumpCheck(MemoryContext context);
+#endif
+
 /*
  * How many extra bytes do we need to request in order to ensure that we can
  * align a pointer to 'alignto'.  Since palloc'd pointers are already aligned
@@ -111,7 +127,7 @@ typedef enum MemoryContextMethodID
 	MCTX_GENERATION_ID,
 	MCTX_SLAB_ID,
 	MCTX_ALIGNED_REDIRECT_ID,
-	MCTX_7_UNUSED_ID,
+	MCTX_BUMP_ID,
 	MCTX_8_UNUSED_ID,
 	MCTX_9_UNUSED_ID,
 	MCTX_10_UNUSED_ID,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 01845ee71d..4f47218dc6 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -335,6 +335,8 @@ BulkInsertState
 BulkInsertStateData
 BulkWriteBuffer
 BulkWriteState
+BumpBlock
+BumpContext
 CACHESIGN
 CAC_state
 CCFastEqualFN
-- 
2.44.0

From 07dba8c7c0f2fde4a0b2f41303e1197f9b06ba0b Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Sun, 7 Apr 2024 16:16:24 +0700
Subject: [PATCH v84 8/8] DEV compare bump context in tests

---
 src/test/modules/test_tidstore/test_tidstore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
index e4aad4dabb..77c275ba92 100644
--- a/src/test/modules/test_tidstore/test_tidstore.c
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -117,7 +117,7 @@ test_create(PG_FUNCTION_ARGS)
 	}
 	else
 		/* VACUUM uses insert only, so we test the other option. */
-		tidstore = TidStoreCreateLocal(tidstore_max_size, false);
+		tidstore = TidStoreCreateLocal(tidstore_max_size, true);
 
 	tidstore_empty_size = TidStoreMemoryUsage(tidstore);
 
-- 
2.44.0

From 2e3340824363161243f984a4be0797c81b247751 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Sat, 6 Apr 2024 21:58:07 +1300
Subject: [PATCH v84 4/8] Enlarge bit-space for MemoryContextMethodID

Reserve 4 bits for MemoryContextMethodID rather than 3.  3 bits did
technically allow a maximum of 8 memory context types, however, we've
opted to reserve some bit patterns which left us with only 4 slots, all
of which were used.

Here we add another bit which frees up 8 slots for future memory context
types.

In passing, adjust the enum names in MemoryContextMethodID to make it
more clear which ones can be used and which ones are reserved.

Author: Matthias van de Meent, David Rowley
Discussion: https://postgr.es/m/CAApHDvqGSpCU95TmM=Bp=6xjl_nlys4zdzopfnywbk97xrd...@mail.gmail.com
---
 src/backend/utils/mmgr/README            | 21 ++++++++----
 src/backend/utils/mmgr/mcxt.c            | 41 +++++++++++++-----------
 src/include/utils/memutils_internal.h    | 18 ++++++++---
 src/include/utils/memutils_memorychunk.h | 30 +++++++++++++----
 4 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/src/backend/utils/mmgr/README b/src/backend/utils/mmgr/README
index b20b9d4852..f484f7d6f5 100644
--- a/src/backend/utils/mmgr/README
+++ b/src/backend/utils/mmgr/README
@@ -395,14 +395,14 @@ relevant MemoryContext as a parameter, operations like free and
 realloc are trickier.  To make those work, we require all memory
 context types to produce allocated chunks that are immediately,
 without any padding, preceded by a uint64 value of which the least
-significant 3 bits are set to the owning context's MemoryContextMethodID.
+significant 4 bits are set to the owning context's MemoryContextMethodID.
 This allows the code to determine the correct MemoryContextMethods to
-use by looking up the mcxt_methods[] array using the 3 bits as an index
+use by looking up the mcxt_methods[] array using the 4 bits as an index
 into that array.
 
 If a type of allocator needs additional information about its chunks,
 like e.g. the size of the allocation, that information can in turn
-either be encoded into the remaining 61 bits of the preceding uint64 value
+either be encoded into the remaining 60 bits of the preceding uint64 value
 or if more space is required, additional values may be stored directly prior
 to the uint64 value.  It is up to the context implementation to manage this.
 
@@ -420,13 +420,20 @@ pfree(void *pointer)
 
 All of the current memory contexts make use of the MemoryChunk header type
 which is defined in memutils_memorychunk.h.  This suits all of the existing
-context types well as it makes use of the remaining 61-bits of the uint64
+context types well as it makes use of the remaining 60-bits of the uint64
 header to efficiently encode the size of the chunk of memory (or freelist
 index, in the case of aset.c) and the number of bytes which must be subtracted
 from the chunk in order to obtain a reference to the block that the chunk
-belongs to.  30 bits are used for each of these.  If more than 30 bits are
-required then the memory context must manage that itself.  This can be done by
-calling the MemoryChunkSetHdrMaskExternal() function on the given chunk.
+belongs to.  30 bits are used for each of these, but only a total of 59 bits
+as the lowest bit for the chunk to block offset is the same bit as the highest
+bit of the chunk size.  This overlapping is possible as the relative offset
+between the block and the chunk is expected to be a MAXALIGNed value which
+guarantees the lowest bit is always 0.  If more than 30 bits are required for
+each of these fields then the memory context must manage that itself.  This
+can be done by calling the MemoryChunkSetHdrMaskExternal() function on the
+given chunk.  Whether a chunk is an external chunk can be determined by the 1
+remaining bit from the 64-bit MemoryChunk.
+
 Currently, each memory context type stores large allocations on dedicated
 blocks (which always contain only a single chunk).  For these, finding the
 block is simple as we know that the chunk must be the first on the given
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 5d426795d9..52ae120af9 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -37,6 +37,11 @@ static Size BogusGetChunkSpace(void *pointer);
 /*****************************************************************************
  *	  GLOBAL MEMORY															 *
  *****************************************************************************/
+#define BOGUS_MCTX(id) \
+	[id].free_p = BogusFree, \
+	[id].realloc = BogusRealloc, \
+	[id].get_chunk_context = BogusGetChunkContext, \
+	[id].get_chunk_space = BogusGetChunkSpace
 
 static const MemoryContextMethods mcxt_methods[] = {
 	/* aset.c */
@@ -103,27 +108,25 @@ static const MemoryContextMethods mcxt_methods[] = {
 	 * invoked from inspection of a chunk (see MCXT_METHOD calls below).
 	 */
 
-	[MCTX_UNUSED1_ID].free_p = BogusFree,
-	[MCTX_UNUSED1_ID].realloc = BogusRealloc,
-	[MCTX_UNUSED1_ID].get_chunk_context = BogusGetChunkContext,
-	[MCTX_UNUSED1_ID].get_chunk_space = BogusGetChunkSpace,
-
-	[MCTX_UNUSED2_ID].free_p = BogusFree,
-	[MCTX_UNUSED2_ID].realloc = BogusRealloc,
-	[MCTX_UNUSED2_ID].get_chunk_context = BogusGetChunkContext,
-	[MCTX_UNUSED2_ID].get_chunk_space = BogusGetChunkSpace,
-
-	[MCTX_UNUSED3_ID].free_p = BogusFree,
-	[MCTX_UNUSED3_ID].realloc = BogusRealloc,
-	[MCTX_UNUSED3_ID].get_chunk_context = BogusGetChunkContext,
-	[MCTX_UNUSED3_ID].get_chunk_space = BogusGetChunkSpace,
-
-	[MCTX_UNUSED4_ID].free_p = BogusFree,
-	[MCTX_UNUSED4_ID].realloc = BogusRealloc,
-	[MCTX_UNUSED4_ID].get_chunk_context = BogusGetChunkContext,
-	[MCTX_UNUSED4_ID].get_chunk_space = BogusGetChunkSpace,
+	BOGUS_MCTX(MCTX_7_UNUSED_ID),
+	BOGUS_MCTX(MCTX_8_UNUSED_ID),
+	BOGUS_MCTX(MCTX_9_UNUSED_ID),
+	BOGUS_MCTX(MCTX_10_UNUSED_ID),
+	BOGUS_MCTX(MCTX_11_UNUSED_ID),
+	BOGUS_MCTX(MCTX_12_UNUSED_ID),
+	BOGUS_MCTX(MCTX_13_UNUSED_ID),
+	BOGUS_MCTX(MCTX_14_UNUSED_ID),
+
+	/*
+	 * Reserved IDs with bit patterns that we'd see if we were working on
+	 * invalid memory, either uninitialized or wiped.
+	 */
+	BOGUS_MCTX(MCTX_0_RESERVED_UNUSEDMEM_ID),
+	BOGUS_MCTX(MCTX_15_RESERVED_WIPEMEM_ID),
 };
 
+#undef BOGUS_MCTX
+
 /*
  * CurrentMemoryContext
  *		Default memory context for allocations.
diff --git a/src/include/utils/memutils_internal.h b/src/include/utils/memutils_internal.h
index ad1048fd82..c3f010b595 100644
--- a/src/include/utils/memutils_internal.h
+++ b/src/include/utils/memutils_internal.h
@@ -104,21 +104,29 @@ extern Size AlignedAllocGetChunkSpace(void *pointer);
  */
 typedef enum MemoryContextMethodID
 {
-	MCTX_UNUSED1_ID,			/* 000 occurs in never-used memory */
-	MCTX_UNUSED2_ID,			/* glibc malloc'd chunks usually match 001 */
-	MCTX_UNUSED3_ID,			/* glibc malloc'd chunks > 128kB match 010 */
+	MCTX_0_RESERVED_UNUSEDMEM_ID,	/*  0; occurs in never-used memory */
+	MCTX_1_RESERVED_GLIBC_ID,	/*  1=0001; glibc malloc'd chunks */
+	MCTX_2_RESERVED_GLIBC_ID,	/*  2=0010; glibc malloc'd chunks > 128kB */
 	MCTX_ASET_ID,
 	MCTX_GENERATION_ID,
 	MCTX_SLAB_ID,
 	MCTX_ALIGNED_REDIRECT_ID,
-	MCTX_UNUSED4_ID,			/* 111 occurs in wipe_mem'd memory */
+	MCTX_7_UNUSED_ID,
+	MCTX_8_UNUSED_ID,
+	MCTX_9_UNUSED_ID,
+	MCTX_10_UNUSED_ID,
+	MCTX_11_UNUSED_ID,
+	MCTX_12_UNUSED_ID,
+	MCTX_13_UNUSED_ID,
+	MCTX_14_UNUSED_ID,
+	MCTX_15_RESERVED_WIPEMEM_ID	/* 1111 occurs in wipe_mem'd memory (0x7F) */
 } MemoryContextMethodID;
 
 /*
  * The number of bits that 8-byte memory chunk headers can use to encode the
  * MemoryContextMethodID.
  */
-#define MEMORY_CONTEXT_METHODID_BITS 3
+#define MEMORY_CONTEXT_METHODID_BITS 4
 #define MEMORY_CONTEXT_METHODID_MASK \
 	((((uint64) 1) << MEMORY_CONTEXT_METHODID_BITS) - 1)
 
diff --git a/src/include/utils/memutils_memorychunk.h b/src/include/utils/memutils_memorychunk.h
index 38296abe1b..948a5ac954 100644
--- a/src/include/utils/memutils_memorychunk.h
+++ b/src/include/utils/memutils_memorychunk.h
@@ -12,7 +12,7 @@
  * Although MemoryChunks are used by each of our MemoryContexts, future
  * implementations may choose to implement their own method for storing chunk
  * headers.  The only requirement is that the header ends with an 8-byte value
- * which the least significant 3-bits of are set to the MemoryContextMethodID
+ * which the least significant 4-bits of are set to the MemoryContextMethodID
  * of the given context.
  *
  * By default, a MemoryChunk is 8 bytes in size, however, when
@@ -25,7 +25,7 @@
  * used to encode 4 separate pieces of information.  Starting with the least
  * significant bits of 'hdrmask', the bit space is reserved as follows:
  *
- * 1.	3-bits to indicate the MemoryContextMethodID as defined by
+ * 1.	4-bits to indicate the MemoryContextMethodID as defined by
  *		MEMORY_CONTEXT_METHODID_MASK
  * 2.	1-bit to denote an "external" chunk (see below)
  * 3.	30-bits reserved for the MemoryContext to use for anything it
@@ -34,6 +34,14 @@
  * 4.	30-bits for the number of bytes that must be subtracted from the chunk
  *		to obtain the address of the block that the chunk is stored on.
  *
+ * If you're paying close attention, you'll notice this adds up to 65 bits
+ * rather than 64 bits.  This is because the highest order bit of #3 is the
+ * same bit as the lowest order bit of #4.  We can do this as we insist that
+ * the chunk and block pointers are both MAXALIGNed, therefore the relative
+ * offset between those will always be a MAXALIGNed value which means the
+ * lowest order bit is always 0.  When fetching the the chunk to block offset
+ * we mask out the lowest-order bit to ensure it's still zero.
+ *
  * In some cases, for example when memory allocations become large, it's
  * possible fields 3 and 4 above are not large enough to store the values
  * required for the chunk.  In this case, the MemoryContext can choose to mark
@@ -93,10 +101,16 @@
  */
 #define MEMORYCHUNK_MAX_BLOCKOFFSET		UINT64CONST(0x3FFFFFFF)
 
+/*
+ * As above, but mask out the lowest-order (always zero) bit as this is shared
+ * with the MemoryChunkGetValue field.
+ */
+#define MEMORYCHUNK_BLOCKOFFSET_MASK 	UINT64CONST(0x3FFFFFFE)
+
 /* define the least significant base-0 bit of each portion of the hdrmask */
 #define MEMORYCHUNK_EXTERNAL_BASEBIT	MEMORY_CONTEXT_METHODID_BITS
 #define MEMORYCHUNK_VALUE_BASEBIT		(MEMORYCHUNK_EXTERNAL_BASEBIT + 1)
-#define MEMORYCHUNK_BLOCKOFFSET_BASEBIT	(MEMORYCHUNK_VALUE_BASEBIT + 30)
+#define MEMORYCHUNK_BLOCKOFFSET_BASEBIT	(MEMORYCHUNK_VALUE_BASEBIT + 29)
 
 /*
  * A magic number for storing in the free bits of an external chunk.  This
@@ -131,11 +145,11 @@ typedef struct MemoryChunk
 	(((hdrmask) >> MEMORYCHUNK_VALUE_BASEBIT) & MEMORYCHUNK_MAX_VALUE)
 
 /*
- * We should have used up all the bits here, so the compiler is likely to
- * optimize out the & MEMORYCHUNK_MAX_BLOCKOFFSET.
+ * We should have used up all the bits here, we just need to ensure we mask
+ * out the low-order bit that's shared with the MemoryChunkGetValue field.
  */
 #define HdrMaskBlockOffset(hdrmask) \
-	(((hdrmask) >> MEMORYCHUNK_BLOCKOFFSET_BASEBIT) & MEMORYCHUNK_MAX_BLOCKOFFSET)
+	(((hdrmask) >> MEMORYCHUNK_BLOCKOFFSET_BASEBIT) & MEMORYCHUNK_BLOCKOFFSET_MASK)
 
 /* For external chunks only, check the magic number matches */
 #define HdrMaskCheckMagic(hdrmask) \
@@ -149,6 +163,7 @@ typedef struct MemoryChunk
  * The number of bytes between 'block' and 'chunk' must be <=
  * MEMORYCHUNK_MAX_BLOCKOFFSET.
  * 'value' must be <= MEMORYCHUNK_MAX_VALUE.
+ * Both 'chunk' and 'block' must be MAXALIGNed pointers.
  */
 static inline void
 MemoryChunkSetHdrMask(MemoryChunk *chunk, void *block,
@@ -157,7 +172,7 @@ MemoryChunkSetHdrMask(MemoryChunk *chunk, void *block,
 	Size		blockoffset = (char *) chunk - (char *) block;
 
 	Assert((char *) chunk >= (char *) block);
-	Assert(blockoffset <= MEMORYCHUNK_MAX_BLOCKOFFSET);
+	Assert((blockoffset & MEMORYCHUNK_BLOCKOFFSET_MASK) == blockoffset);
 	Assert(value <= MEMORYCHUNK_MAX_VALUE);
 	Assert((int) methodid <= MEMORY_CONTEXT_METHODID_MASK);
 
@@ -225,6 +240,7 @@ MemoryChunkGetBlock(MemoryChunk *chunk)
 }
 
 /* cleanup all internal definitions */
+#undef MEMORYCHUNK_BLOCKOFFSET_MASK
 #undef MEMORYCHUNK_EXTERNAL_BASEBIT
 #undef MEMORYCHUNK_VALUE_BASEBIT
 #undef MEMORYCHUNK_BLOCKOFFSET_BASEBIT
-- 
2.44.0

From 660fa32ae29b7d7fd0e14d1d73f025a01ce90386 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Sun, 7 Apr 2024 16:16:01 +0700
Subject: [PATCH v84 7/8] DEV log memory usage in tests

---
 src/test/modules/test_tidstore/expected/test_tidstore.out | 4 ++--
 src/test/modules/test_tidstore/sql/test_tidstore.sql      | 4 ++--
 src/test/modules/test_tidstore/test_tidstore.c            | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out
index c40779859b..aae86579e3 100644
--- a/src/test/modules/test_tidstore/expected/test_tidstore.out
+++ b/src/test/modules/test_tidstore/expected/test_tidstore.out
@@ -54,8 +54,8 @@ SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoff
 -- to the allocated memory it started out with. This is easier
 -- with memory contexts in local memory.
 INSERT INTO hideblocks (blockno)
-SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
-  FROM generate_series(1000, 2000, 1) blk;
+SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63]::int2[])
+  FROM generate_series(1000, 200000, 1) blk;
 -- Zero offset not allowed
 SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
 ERROR:  tuple offset out of range: 0
diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql
index 1f4e4a807e..7b64b0bf58 100644
--- a/src/test/modules/test_tidstore/sql/test_tidstore.sql
+++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql
@@ -37,8 +37,8 @@ SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoff
 -- to the allocated memory it started out with. This is easier
 -- with memory contexts in local memory.
 INSERT INTO hideblocks (blockno)
-SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63,64,200]::int2[])
-  FROM generate_series(1000, 2000, 1) blk;
+SELECT do_set_block_offsets(blk, ARRAY[1,31,32,63]::int2[])
+  FROM generate_series(1000, 200000, 1) blk;
 
 -- Zero offset not allowed
 SELECT do_set_block_offsets(1, ARRAY[0]::int2[]);
diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
index 0a3a58722d..e4aad4dabb 100644
--- a/src/test/modules/test_tidstore/test_tidstore.c
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -291,6 +291,7 @@ test_is_full(PG_FUNCTION_ARGS)
 	bool		is_full;
 
 	is_full = (TidStoreMemoryUsage(tidstore) > tidstore_empty_size);
+	fprintf(stderr, "TID bytes%zu\n", TidStoreMemoryUsage(tidstore));
 
 	PG_RETURN_BOOL(is_full);
 }
-- 
2.44.0

From a99757d435b5dbe612aadcca1bbb05f9c5d87259 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Sun, 7 Apr 2024 12:27:34 +0700
Subject: [PATCH v84 6/8] Use bump context for vacuum's TID storage

Vacuum does not pfree individual entries, and only frees the entire
storage space when finished with it. This allows using a bump context,
eliminating the chunk header in each leaf allocation. Most leaf
allocations will be 16 or 24 bytes, so that's a significant savings.
TidStoreCreateLocal gets a boolean parameter to hint that the created
store is insert-only.

This requires a separate tree context for iteration, since we free
the iteration state after iteration completes.

Discussion: https://postgr.es/m/https://www.postgresql.org/message-id/CANWCAZac%3DpBePg3rhX8nXkUuaLoiAJJLtmnCfZsPEAS4EtJ%3Dkg%40mail.gmail.com
---
 src/backend/access/common/tidstore.c           | 15 +++++++++++++--
 src/backend/access/heap/vacuumlazy.c           |  4 ++--
 src/include/access/tidstore.h                  |  2 +-
 src/include/lib/radixtree.h                    | 11 ++++++++++-
 src/test/modules/test_tidstore/test_tidstore.c |  3 ++-
 5 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index 5784223b76..78a09b72ca 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -163,7 +163,7 @@ static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
  * by TidStoreMemoryUsage().
  */
 TidStore *
-TidStoreCreateLocal(size_t max_bytes)
+TidStoreCreateLocal(size_t max_bytes, bool insert_only)
 {
 	TidStore   *ts;
 	size_t		initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
@@ -181,11 +181,22 @@ TidStoreCreateLocal(size_t max_bytes)
 		maxBlockSize = ALLOCSET_DEFAULT_INITSIZE;
 
 	/* Create a memory context for the TID storage */
-	ts->rt_context = AllocSetContextCreate(CurrentMemoryContext,
+	if (insert_only)
+	{
+		ts->rt_context = BumpContextCreate(CurrentMemoryContext,
 										   "TID storage",
 										   minContextSize,
 										   initBlockSize,
 										   maxBlockSize);
+	}
+	else
+	{
+		ts->rt_context = AllocSetContextCreate(CurrentMemoryContext,
+											   "TID storage",
+											   minContextSize,
+											   initBlockSize,
+											   maxBlockSize);
+	}
 
 	ts->tree.local = local_ts_create(ts->rt_context);
 
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index c3a9dc1ad6..de109acc89 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -2874,7 +2874,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 	dead_items_info->num_items = 0;
 	vacrel->dead_items_info = dead_items_info;
 
-	vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes);
+	vacrel->dead_items = TidStoreCreateLocal(dead_items_info->max_bytes, true);
 }
 
 /*
@@ -2910,7 +2910,7 @@ dead_items_reset(LVRelState *vacrel)
 
 	/* Recreate the tidstore with the same max_bytes limitation */
 	TidStoreDestroy(dead_items);
-	vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes);
+	vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes, true);
 
 	/* Reset the counter */
 	vacrel->dead_items_info->num_items = 0;
diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h
index f05d748783..32aa999519 100644
--- a/src/include/access/tidstore.h
+++ b/src/include/access/tidstore.h
@@ -29,7 +29,7 @@ typedef struct TidStoreIterResult
 	OffsetNumber *offsets;
 } TidStoreIterResult;
 
-extern TidStore *TidStoreCreateLocal(size_t max_bytes);
+extern TidStore *TidStoreCreateLocal(size_t max_bytes, bool insert_only);
 extern TidStore *TidStoreCreateShared(size_t max_bytes, int tranche_id);
 extern TidStore *TidStoreAttach(dsa_handle area_handle, dsa_pointer handle);
 extern void TidStoreDetach(TidStore *ts);
diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h
index a9a2c90db2..6291b360e2 100644
--- a/src/include/lib/radixtree.h
+++ b/src/include/lib/radixtree.h
@@ -713,6 +713,7 @@ struct RT_RADIX_TREE
 	/* leaf_context is used only for single-value leaves */
 	MemoryContextData *leaf_context;
 #endif
+	MemoryContextData *iter_context;
 };
 
 /*
@@ -1827,6 +1828,14 @@ RT_CREATE(MemoryContext ctx)
 	tree = (RT_RADIX_TREE *) palloc0(sizeof(RT_RADIX_TREE));
 	tree->context = ctx;
 
+	/*
+	 * Separate context for iteration in case the tree context doesn't support
+	 * pfree
+	 */
+	tree->iter_context = AllocSetContextCreate(ctx,
+											   RT_STR(RT_PREFIX) "radix tree iteration",
+											   ALLOCSET_SMALL_SIZES);
+
 #ifdef RT_SHMEM
 	tree->dsa = dsa;
 	dp = dsa_allocate0(dsa, sizeof(RT_RADIX_TREE_CONTROL));
@@ -2069,7 +2078,7 @@ RT_BEGIN_ITERATE(RT_RADIX_TREE * tree)
 	RT_ITER    *iter;
 	RT_CHILD_PTR root;
 
-	iter = (RT_ITER *) MemoryContextAllocZero(tree->context,
+	iter = (RT_ITER *) MemoryContextAllocZero(tree->iter_context,
 											  sizeof(RT_ITER));
 	iter->tree = tree;
 
diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
index 32c6c477b7..0a3a58722d 100644
--- a/src/test/modules/test_tidstore/test_tidstore.c
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -116,7 +116,8 @@ test_create(PG_FUNCTION_ARGS)
 		dsa_pin_mapping(TidStoreGetDSA(tidstore));
 	}
 	else
-		tidstore = TidStoreCreateLocal(tidstore_max_size);
+		/* VACUUM uses insert only, so we test the other option. */
+		tidstore = TidStoreCreateLocal(tidstore_max_size, false);
 
 	tidstore_empty_size = TidStoreMemoryUsage(tidstore);
 
-- 
2.44.0

From 8547efd90cb438a2c9624a5dfbeef945e1ca97ac Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 5 Apr 2024 17:45:59 +0700
Subject: [PATCH v84 3/8] Teach radix tree to embed values at runtime

Previously, the decision to store values in leaves or within the child
pointer was made at compile time, with variable length values using
leaves by necessity. This commit allows introspecting the length of
variable length values at runtime for that decision. This requires
the ability to tell whether the last-level child pointer is actually
a value, so we use a pointer tag in the lowest level bit.

Use this in TID store. This entails adding a byte to the header to
reserve space for the tag. Commit XXXXXXXXX stores up to three offsets
within the header with no bitmap, and now the header can be embedded
as above. This reduces worst-case memory usage when TIDs are sparse.

Discussion: https://postgr.es/m/
---
 src/backend/access/common/tidstore.c | 79 ++++++++++++++++++++--------
 src/include/lib/radixtree.h          | 32 +++++++++++
 2 files changed, 88 insertions(+), 23 deletions(-)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index 26eb52948b..5784223b76 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -43,19 +43,50 @@
  */
 typedef struct BlocktableEntry
 {
-	uint16		nwords;
-
-	/*
-	 * We can store a small number of offsets here to avoid wasting space with
-	 * a sparse bitmap.
-	 */
-	OffsetNumber full_offsets[NUM_FULL_OFFSETS];
+	union
+	{
+		struct
+		{
+#ifndef WORDS_BIGENDIAN
+			/*
+			 * We need to position this member so that the backing radix tree
+			 * can use the lowest bit for a pointer tag. In particular, it
+			 * must be placed within 'header' so that it corresponds to the
+			 * lowest byte in 'ptr'. We position 'nwords' along with it to
+			 * avoid struct padding.
+			 */
+			uint8		flags;
+
+			int8		nwords;
+#endif
+
+			/*
+			 * We can store a small number of offsets here to avoid wasting
+			 * space with a sparse bitmap.
+			 */
+			OffsetNumber full_offsets[NUM_FULL_OFFSETS];
+
+#ifdef WORDS_BIGENDIAN
+			int8		nwords;
+			uint8		flags;
+#endif
+		};
+		uintptr_t	ptr;
+	}			header;
 
 	bitmapword	words[FLEXIBLE_ARRAY_MEMBER];
 } BlocktableEntry;
+
+/*
+ * The type of 'nwords' limits the max number of words in the 'words' array.
+ * This computes the max offset we can actually store in the bitmap. In
+ * practice, it's almost always the same as MaxOffsetNumber.
+ */
+#define MAX_OFFSET_IN_BITMAP Min(BITS_PER_BITMAPWORD * PG_INT8_MAX - 1, MaxOffsetNumber)
+
 #define MaxBlocktableEntrySize \
 	offsetof(BlocktableEntry, words) + \
-		(sizeof(bitmapword) * WORDS_PER_PAGE(MaxOffsetNumber))
+		(sizeof(bitmapword) * WORDS_PER_PAGE(MAX_OFFSET_IN_BITMAP))
 
 #define RT_PREFIX local_ts
 #define RT_SCOPE static
@@ -64,7 +95,8 @@ typedef struct BlocktableEntry
 #define RT_VALUE_TYPE BlocktableEntry
 #define RT_VARLEN_VALUE_SIZE(page) \
 	(offsetof(BlocktableEntry, words) + \
-	sizeof(bitmapword) * (page)->nwords)
+	sizeof(bitmapword) * (page)->header.nwords)
+#define RT_RUNTIME_EMBEDDABLE_VALUE
 #include "lib/radixtree.h"
 
 #define RT_PREFIX shared_ts
@@ -75,7 +107,8 @@ typedef struct BlocktableEntry
 #define RT_VALUE_TYPE BlocktableEntry
 #define RT_VARLEN_VALUE_SIZE(page) \
 	(offsetof(BlocktableEntry, words) + \
-	sizeof(bitmapword) * (page)->nwords)
+	sizeof(bitmapword) * (page)->header.nwords)
+#define RT_RUNTIME_EMBEDDABLE_VALUE
 #include "lib/radixtree.h"
 
 /* Per-backend state for a TidStore */
@@ -335,13 +368,13 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 			OffsetNumber off = offsets[i];
 
 			/* safety check to ensure we don't overrun bit array bounds */
-			if (!OffsetNumberIsValid(off))
+			if (off == InvalidOffsetNumber || off > MAX_OFFSET_IN_BITMAP)
 				elog(ERROR, "tuple offset out of range: %u", off);
 
-			page->full_offsets[i] = off;
+			page->header.full_offsets[i] = off;
 		}
 
-		page->nwords = 0;
+		page->header.nwords = 0;
 	}
 	else
 	{
@@ -356,7 +389,7 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 				OffsetNumber off = offsets[idx];
 
 				/* safety check to ensure we don't overrun bit array bounds */
-				if (!OffsetNumberIsValid(off))
+				if (off == InvalidOffsetNumber || off > MAX_OFFSET_IN_BITMAP)
 					elog(ERROR, "tuple offset out of range: %u", off);
 
 				if (off >= next_word_threshold)
@@ -370,8 +403,8 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 			page->words[wordnum] = word;
 		}
 
-		page->nwords = wordnum;
-		Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
+		page->header.nwords = wordnum;
+		Assert(page->header.nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
 	}
 
 	if (TidStoreIsShared(ts))
@@ -399,12 +432,12 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 	if (page == NULL)
 		return false;
 
-	if (page->nwords == 0)
+	if (page->header.nwords == 0)
 	{
 		/* we have offsets in the header */
 		for (int i = 0; i < NUM_FULL_OFFSETS; i++)
 		{
-			if (page->full_offsets[i] == off)
+			if (page->header.full_offsets[i] == off)
 				return true;
 		}
 		return false;
@@ -415,7 +448,7 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 		bitnum = BITNUM(off);
 
 		/* no bitmap for the off */
-		if (wordnum >= page->nwords)
+		if (wordnum >= page->header.nwords)
 			return false;
 
 		return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
@@ -539,18 +572,18 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
 	result->num_offsets = 0;
 	result->blkno = blkno;
 
-	if (page->nwords == 0)
+	if (page->header.nwords == 0)
 	{
 		/* we have offsets in the header */
 		for (int i = 0; i < NUM_FULL_OFFSETS; i++)
 		{
-			if (page->full_offsets[i] != InvalidOffsetNumber)
-				result->offsets[result->num_offsets++] = page->full_offsets[i];
+			if (page->header.full_offsets[i] != InvalidOffsetNumber)
+				result->offsets[result->num_offsets++] = page->header.full_offsets[i];
 		}
 	}
 	else
 	{
-		for (wordnum = 0; wordnum < page->nwords; wordnum++)
+		for (wordnum = 0; wordnum < page->header.nwords; wordnum++)
 		{
 			bitmapword	w = page->words[wordnum];
 			int			off = wordnum * BITS_PER_BITMAPWORD;
diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h
index 6f36e8bfde..a9a2c90db2 100644
--- a/src/include/lib/radixtree.h
+++ b/src/include/lib/radixtree.h
@@ -105,6 +105,10 @@
  *   involving a pointer to the value type, to calculate size.
  *     NOTE: implies that the value is in fact variable-length,
  *     so do not set for fixed-length values.
+ * - RT_RUNTIME_EMBEDDABLE_VALUE - for variable length values, allows
+ *   storing the value in a child pointer slot, rather than as a single-
+ *   value leaf, if small enough. This requires that the value, when
+ *   read as a child pointer, can be tagged in the lowest bit.
  *
  * Optional parameters:
  * - RT_SHMEM - if defined, the radix tree is created in the DSA area
@@ -437,7 +441,13 @@ static inline bool
 RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
 {
 #ifdef RT_VARLEN_VALUE_SIZE
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+	return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
+#else
 	return false;
+#endif
+
 #else
 	return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
 #endif
@@ -451,7 +461,19 @@ static inline bool
 RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
 {
 #ifdef RT_VARLEN_VALUE_SIZE
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+	/* check for pointer tag */
+#ifdef RT_SHMEM
+	return child & 1;
+#else
+	return ((uintptr_t) child) & 1;
+#endif
+
+#else
 	return false;
+#endif
+
 #else
 	return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
 #endif
@@ -1728,6 +1750,15 @@ have_slot:
 	{
 		/* store value directly in child pointer slot */
 		memcpy(slot, value_p, value_sz);
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+		/* tag child pointer */
+#ifdef RT_SHMEM
+		*slot |= 1;
+#else
+		*((uintptr_t *) slot) |= 1;
+#endif
+#endif
 	}
 	else
 	{
@@ -2879,6 +2910,7 @@ RT_DUMP_NODE(RT_NODE * node)
 #undef RT_DEFINE
 #undef RT_VALUE_TYPE
 #undef RT_VARLEN_VALUE_SIZE
+#undef RT_RUNTIME_EMBEDDABLE_VALUE
 #undef RT_SHMEM
 #undef RT_USE_DELETE
 #undef RT_DEBUG
-- 
2.44.0

From 89d5abe4f0b945d07645ac7ead252c8aafe09331 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 5 Apr 2024 17:06:55 +0700
Subject: [PATCH v84 2/8] pgindent

---
 src/backend/access/common/tidstore.c | 99 ++++++++++++++--------------
 1 file changed, 51 insertions(+), 48 deletions(-)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index 4eb5d46951..26eb52948b 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -45,7 +45,10 @@ typedef struct BlocktableEntry
 {
 	uint16		nwords;
 
-	/* We can store a small number of offsets here to avoid wasting space with a sparse bitmap. */
+	/*
+	 * We can store a small number of offsets here to avoid wasting space with
+	 * a sparse bitmap.
+	 */
 	OffsetNumber full_offsets[NUM_FULL_OFFSETS];
 
 	bitmapword	words[FLEXIBLE_ARRAY_MEMBER];
@@ -342,35 +345,35 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 	}
 	else
 	{
-	for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
-		 wordnum <= WORDNUM(offsets[num_offsets - 1]);
-		 wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
-	{
-		word = 0;
-
-		while (idx < num_offsets)
+		for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
+			 wordnum <= WORDNUM(offsets[num_offsets - 1]);
+			 wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
 		{
-			OffsetNumber off = offsets[idx];
+			word = 0;
 
-			/* safety check to ensure we don't overrun bit array bounds */
-			if (!OffsetNumberIsValid(off))
-				elog(ERROR, "tuple offset out of range: %u", off);
+			while (idx < num_offsets)
+			{
+				OffsetNumber off = offsets[idx];
+
+				/* safety check to ensure we don't overrun bit array bounds */
+				if (!OffsetNumberIsValid(off))
+					elog(ERROR, "tuple offset out of range: %u", off);
 
-			if (off >= next_word_threshold)
-				break;
+				if (off >= next_word_threshold)
+					break;
 
-			word |= ((bitmapword) 1 << BITNUM(off));
-			idx++;
+				word |= ((bitmapword) 1 << BITNUM(off));
+				idx++;
+			}
+
+			/* write out offset bitmap for this wordnum */
+			page->words[wordnum] = word;
 		}
 
-		/* write out offset bitmap for this wordnum */
-		page->words[wordnum] = word;
+		page->nwords = wordnum;
+		Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
 	}
 
-	page->nwords = wordnum;
-	Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
-}
-
 	if (TidStoreIsShared(ts))
 		shared_ts_set(ts->tree.shared, blkno, page);
 	else
@@ -408,15 +411,15 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 	}
 	else
 	{
-	wordnum = WORDNUM(off);
-	bitnum = BITNUM(off);
+		wordnum = WORDNUM(off);
+		bitnum = BITNUM(off);
 
-	/* no bitmap for the off */
-	if (wordnum >= page->nwords)
-		return false;
+		/* no bitmap for the off */
+		if (wordnum >= page->nwords)
+			return false;
 
-	return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
-}
+		return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
+	}
 }
 
 /*
@@ -547,26 +550,26 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
 	}
 	else
 	{
-	for (wordnum = 0; wordnum < page->nwords; wordnum++)
-	{
-		bitmapword	w = page->words[wordnum];
-		int			off = wordnum * BITS_PER_BITMAPWORD;
-
-		/* Make sure there is enough space to add offsets */
-		if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
+		for (wordnum = 0; wordnum < page->nwords; wordnum++)
 		{
-			result->max_offset *= 2;
-			result->offsets = repalloc(result->offsets,
-									   sizeof(OffsetNumber) * result->max_offset);
-		}
-
-		while (w != 0)
-		{
-			if (w & 1)
-				result->offsets[result->num_offsets++] = (OffsetNumber) off;
-			off++;
-			w >>= 1;
+			bitmapword	w = page->words[wordnum];
+			int			off = wordnum * BITS_PER_BITMAPWORD;
+
+			/* Make sure there is enough space to add offsets */
+			if ((result->num_offsets + BITS_PER_BITMAPWORD) > result->max_offset)
+			{
+				result->max_offset *= 2;
+				result->offsets = repalloc(result->offsets,
+										   sizeof(OffsetNumber) * result->max_offset);
+			}
+
+			while (w != 0)
+			{
+				if (w & 1)
+					result->offsets[result->num_offsets++] = (OffsetNumber) off;
+				off++;
+				w >>= 1;
+			}
 		}
 	}
 }
-}
-- 
2.44.0

From 24bd672deb4a6fa14abfc8583b500d1cbc332032 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 5 Apr 2024 17:04:12 +0700
Subject: [PATCH v84 1/8] store offsets in the header

---
 src/backend/access/common/tidstore.c          | 52 +++++++++++++++++++
 .../test_tidstore/expected/test_tidstore.out  | 14 +++++
 .../test_tidstore/sql/test_tidstore.sql       |  5 ++
 3 files changed, 71 insertions(+)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index e1a7e82469..4eb5d46951 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -34,6 +34,9 @@
 /* number of active words for a page: */
 #define WORDS_PER_PAGE(n) ((n) / BITS_PER_BITMAPWORD + 1)
 
+/* number of offsets we can store in the header of a BlocktableEntry */
+#define NUM_FULL_OFFSETS ((sizeof(uintptr_t) - sizeof(uint16)) / sizeof(OffsetNumber))
+
 /*
  * This is named similarly to PagetableEntry in tidbitmap.c
  * because the two have a similar function.
@@ -41,6 +44,10 @@
 typedef struct BlocktableEntry
 {
 	uint16		nwords;
+
+	/* We can store a small number of offsets here to avoid wasting space with a sparse bitmap. */
+	OffsetNumber full_offsets[NUM_FULL_OFFSETS];
+
 	bitmapword	words[FLEXIBLE_ARRAY_MEMBER];
 } BlocktableEntry;
 #define MaxBlocktableEntrySize \
@@ -316,6 +323,25 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 	for (int i = 1; i < num_offsets; i++)
 		Assert(offsets[i] > offsets[i - 1]);
 
+	memset(page, 0, offsetof(BlocktableEntry, words));
+
+	if (num_offsets <= NUM_FULL_OFFSETS)
+	{
+		for (int i = 0; i < num_offsets; i++)
+		{
+			OffsetNumber off = offsets[i];
+
+			/* safety check to ensure we don't overrun bit array bounds */
+			if (!OffsetNumberIsValid(off))
+				elog(ERROR, "tuple offset out of range: %u", off);
+
+			page->full_offsets[i] = off;
+		}
+
+		page->nwords = 0;
+	}
+	else
+	{
 	for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
 		 wordnum <= WORDNUM(offsets[num_offsets - 1]);
 		 wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
@@ -343,6 +369,7 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 
 	page->nwords = wordnum;
 	Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
+}
 
 	if (TidStoreIsShared(ts))
 		shared_ts_set(ts->tree.shared, blkno, page);
@@ -369,6 +396,18 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 	if (page == NULL)
 		return false;
 
+	if (page->nwords == 0)
+	{
+		/* we have offsets in the header */
+		for (int i = 0; i < NUM_FULL_OFFSETS; i++)
+		{
+			if (page->full_offsets[i] == off)
+				return true;
+		}
+		return false;
+	}
+	else
+	{
 	wordnum = WORDNUM(off);
 	bitnum = BITNUM(off);
 
@@ -378,6 +417,7 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 
 	return (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
 }
+}
 
 /*
  * Prepare to iterate through a TidStore.
@@ -496,6 +536,17 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
 	result->num_offsets = 0;
 	result->blkno = blkno;
 
+	if (page->nwords == 0)
+	{
+		/* we have offsets in the header */
+		for (int i = 0; i < NUM_FULL_OFFSETS; i++)
+		{
+			if (page->full_offsets[i] != InvalidOffsetNumber)
+				result->offsets[result->num_offsets++] = page->full_offsets[i];
+		}
+	}
+	else
+	{
 	for (wordnum = 0; wordnum < page->nwords; wordnum++)
 	{
 		bitmapword	w = page->words[wordnum];
@@ -518,3 +569,4 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
 		}
 	}
 }
+}
diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out
index 0ae2f970da..c40779859b 100644
--- a/src/test/modules/test_tidstore/expected/test_tidstore.out
+++ b/src/test/modules/test_tidstore/expected/test_tidstore.out
@@ -36,6 +36,20 @@ SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
     (VALUES (0), (1), (:maxblkno / 2), (:maxblkno - 1), (:maxblkno)) AS blocks(blk),
     (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
   GROUP BY blk;
+-- Test offsets embedded in the bitmap header.
+SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
+ do_set_block_offsets 
+----------------------
+                  501
+(1 row)
+
+SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
+  FROM generate_series(1, 3);
+ do_set_block_offsets 
+----------------------
+                  502
+(1 row)
+
 -- Add enough TIDs to cause the store to appear "full", compared
 -- to the allocated memory it started out with. This is easier
 -- with memory contexts in local memory.
diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql
index e5edfbb264..1f4e4a807e 100644
--- a/src/test/modules/test_tidstore/sql/test_tidstore.sql
+++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql
@@ -28,6 +28,11 @@ SELECT do_set_block_offsets(blk, array_agg(off)::int2[])
     (VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)) AS offsets(off)
   GROUP BY blk;
 
+-- Test offsets embedded in the bitmap header.
+SELECT do_set_block_offsets(501, array[greatest((random() * :maxoffset)::int, 1)]::int2[]);
+SELECT do_set_block_offsets(502, array_agg(DISTINCT greatest((random() * :maxoffset)::int, 1))::int2[])
+  FROM generate_series(1, 3);
+
 -- Add enough TIDs to cause the store to appear "full", compared
 -- to the allocated memory it started out with. This is easier
 -- with memory contexts in local memory.
-- 
2.44.0

Reply via email to