Re: [HACKERS] PATCH: two slab-like memory allocators

Tomas Vondra Mon, 20 Feb 2017 16:44:57 -0800

Hi,

Attached is v9 of this patch series. This addresses most of the pointsraised in the review, namely:


1) change most 'debug' stuff to 'static inline' in memdebug.h

2) fixed and reworded a bunch of comments

3) get rid of the block-level bitmap tracking free chunks

Instead of the bitmap, I've used a simple singly-linked list, usingint32 chunk indexes. Perhaps it could use the slist instead, but I'm notquite sure MAXALIGN is guaranteed to be greater than pointer.

In any case, this seems to be working reasonably well - it saves a bitof code (but also made some code slightly more complex). Also, it seemsto be a tad faster than v8 - after repeating the same benchmark asbefore, I get these numbers:


                master    slab-v8    slab-v9
    -----------------------------------------
      10000         50         28         25
      50000      17500        180        160
     100000     150000        380        330
     200000          ?        750        670

Although the results are quite noisy.

regards

--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

>From 7c70a7bef4029dd7f10c7dc9ff0dd92a7bd2f966 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 20 Feb 2017 20:16:16 +0100
Subject: [PATCH 1/3] move common bits to memdebug

---
 src/backend/utils/mmgr/Makefile   |   2 +-
 src/backend/utils/mmgr/aset.c     | 115 +-------------------------------------
 src/backend/utils/mmgr/memdebug.c |  93 ++++++++++++++++++++++++++++++
 src/include/utils/memdebug.h      |  48 ++++++++++++++++
 4 files changed, 144 insertions(+), 114 deletions(-)
 create mode 100644 src/backend/utils/mmgr/memdebug.c

diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index 1842bae..fc5f793 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o portalmem.o
+OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/aset.c b/src/backend/utils/mmgr/aset.c
index 4dfc3ec..33b4d01 100644
--- a/src/backend/utils/mmgr/aset.c
+++ b/src/backend/utils/mmgr/aset.c
@@ -41,46 +41,6 @@
  *	chunks as chunks.  Anything "large" is passed off to malloc().  Change
  *	the number of freelists to change the small/large boundary.
  *
- *
- *	About CLOBBER_FREED_MEMORY:
- *
- *	If this symbol is defined, all freed memory is overwritten with 0x7F's.
- *	This is useful for catching places that reference already-freed memory.
- *
- *	About MEMORY_CONTEXT_CHECKING:
- *
- *	Since we usually round request sizes up to the next power of 2, there
- *	is often some unused space immediately after a requested data area.
- *	Thus, if someone makes the common error of writing past what they've
- *	requested, the problem is likely to go unnoticed ... until the day when
- *	there *isn't* any wasted space, perhaps because of different memory
- *	alignment on a new platform, or some other effect.  To catch this sort
- *	of problem, the MEMORY_CONTEXT_CHECKING option stores 0x7E just beyond
- *	the requested space whenever the request is less than the actual chunk
- *	size, and verifies that the byte is undamaged when the chunk is freed.
- *
- *
- *	About USE_VALGRIND and Valgrind client requests:
- *
- *	Valgrind provides "client request" macros that exchange information with
- *	the host Valgrind (if any).  Under !USE_VALGRIND, memdebug.h stubs out
- *	currently-used macros.
- *
- *	When running under Valgrind, we want a NOACCESS memory region both before
- *	and after the allocation.  The chunk header is tempting as the preceding
- *	region, but mcxt.c expects to able to examine the standard chunk header
- *	fields.  Therefore, we use, when available, the requested_size field and
- *	any subsequent padding.  requested_size is made NOACCESS before returning
- *	a chunk pointer to a caller.  However, to reduce client request traffic,
- *	it is kept DEFINED in chunks on the free list.
- *
- *	The rounded-up capacity of the chunk usually acts as a post-allocation
- *	NOACCESS region.  If the request consumes precisely the entire chunk,
- *	there is no such region; another chunk header may immediately follow.  In
- *	that case, Valgrind will not detect access beyond the end of the chunk.
- *
- *	See also the cooperating Valgrind client requests in mcxt.c.
- *
  *-------------------------------------------------------------------------
  */
 
@@ -296,10 +256,10 @@ static const unsigned char LogTable256[256] =
  */
 #ifdef HAVE_ALLOCINFO
 #define AllocFreeInfo(_cxt, _chunk) \
-			fprintf(stderr, "AllocFree: %s: %p, %d\n", \
+			fprintf(stderr, "AllocFree: %s: %p, %lu\n", \
 				(_cxt)->header.name, (_chunk), (_chunk)->size)
 #define AllocAllocInfo(_cxt, _chunk) \
-			fprintf(stderr, "AllocAlloc: %s: %p, %d\n", \
+			fprintf(stderr, "AllocAlloc: %s: %p, %lu\n", \
 				(_cxt)->header.name, (_chunk), (_chunk)->size)
 #else
 #define AllocFreeInfo(_cxt, _chunk)
@@ -345,77 +305,6 @@ AllocSetFreeIndex(Size size)
 	return idx;
 }
 
-#ifdef CLOBBER_FREED_MEMORY
-
-/* Wipe freed memory for debugging purposes */
-static void
-wipe_mem(void *ptr, size_t size)
-{
-	VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
-	memset(ptr, 0x7F, size);
-	VALGRIND_MAKE_MEM_NOACCESS(ptr, size);
-}
-#endif
-
-#ifdef MEMORY_CONTEXT_CHECKING
-static void
-set_sentinel(void *base, Size offset)
-{
-	char	   *ptr = (char *) base + offset;
-
-	VALGRIND_MAKE_MEM_UNDEFINED(ptr, 1);
-	*ptr = 0x7E;
-	VALGRIND_MAKE_MEM_NOACCESS(ptr, 1);
-}
-
-static bool
-sentinel_ok(const void *base, Size offset)
-{
-	const char *ptr = (const char *) base + offset;
-	bool		ret;
-
-	VALGRIND_MAKE_MEM_DEFINED(ptr, 1);
-	ret = *ptr == 0x7E;
-	VALGRIND_MAKE_MEM_NOACCESS(ptr, 1);
-
-	return ret;
-}
-#endif
-
-#ifdef RANDOMIZE_ALLOCATED_MEMORY
-
-/*
- * Fill a just-allocated piece of memory with "random" data.  It's not really
- * very random, just a repeating sequence with a length that's prime.  What
- * we mainly want out of it is to have a good probability that two palloc's
- * of the same number of bytes start out containing different data.
- *
- * The region may be NOACCESS, so make it UNDEFINED first to avoid errors as
- * we fill it.  Filling the region makes it DEFINED, so make it UNDEFINED
- * again afterward.  Whether to finally make it UNDEFINED or NOACCESS is
- * fairly arbitrary.  UNDEFINED is more convenient for AllocSetRealloc(), and
- * other callers have no preference.
- */
-static void
-randomize_mem(char *ptr, size_t size)
-{
-	static int	save_ctr = 1;
-	size_t		remaining = size;
-	int			ctr;
-
-	ctr = save_ctr;
-	VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
-	while (remaining-- > 0)
-	{
-		*ptr++ = ctr;
-		if (++ctr > 251)
-			ctr = 1;
-	}
-	VALGRIND_MAKE_MEM_UNDEFINED(ptr - size, size);
-	save_ctr = ctr;
-}
-#endif   /* RANDOMIZE_ALLOCATED_MEMORY */
-
 
 /*
  * Public routines
diff --git a/src/backend/utils/mmgr/memdebug.c b/src/backend/utils/mmgr/memdebug.c
new file mode 100644
index 0000000..5f603d2
--- /dev/null
+++ b/src/backend/utils/mmgr/memdebug.c
@@ -0,0 +1,93 @@
+/*-------------------------------------------------------------------------
+ *
+ * memdebug.c
+ *	  Declarations used in memory context implementations, not part of the
+ *	  public API of the memory management subsystem.
+ *
+ *
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/utils/memdebug.c
+ *
+ *
+ *	About CLOBBER_FREED_MEMORY:
+ *
+ *	If this symbol is defined, all freed memory is overwritten with 0x7F's.
+ *	This is useful for catching places that reference already-freed memory.
+ *
+ *	About MEMORY_CONTEXT_CHECKING:
+ *
+ *	Since we usually round request sizes up to the next power of 2, there
+ *	is often some unused space immediately after a requested data area.
+ *	Thus, if someone makes the common error of writing past what they've
+ *	requested, the problem is likely to go unnoticed ... until the day when
+ *	there *isn't* any wasted space, perhaps because of different memory
+ *	alignment on a new platform, or some other effect.  To catch this sort
+ *	of problem, the MEMORY_CONTEXT_CHECKING option stores 0x7E just beyond
+ *	the requested space whenever the request is less than the actual chunk
+ *	size, and verifies that the byte is undamaged when the chunk is freed.
+ *
+ *
+ *	About USE_VALGRIND and Valgrind client requests:
+ *
+ *	Valgrind provides "client request" macros that exchange information with
+ *	the host Valgrind (if any).  Under !USE_VALGRIND, memdebug.h stubs out
+ *	currently-used macros.
+ *
+ *	When running under Valgrind, we want a NOACCESS memory region both before
+ *	and after the allocation.  The chunk header is tempting as the preceding
+ *	region, but mcxt.c expects to able to examine the standard chunk header
+ *	fields.  Therefore, we use, when available, the requested_size field and
+ *	any subsequent padding.  requested_size is made NOACCESS before returning
+ *	a chunk pointer to a caller.  However, to reduce client request traffic,
+ *	it is kept DEFINED in chunks on the free list.
+ *
+ *	The rounded-up capacity of the chunk usually acts as a post-allocation
+ *	NOACCESS region.  If the request consumes precisely the entire chunk,
+ *	there is no such region; another chunk header may immediately follow.  In
+ *	that case, Valgrind will not detect access beyond the end of the chunk.
+ *
+ *	See also the cooperating Valgrind client requests in mcxt.c.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+
+/*
+ * Fill a just-allocated piece of memory with "random" data.  It's not really
+ * very random, just a repeating sequence with a length that's prime.  What
+ * we mainly want out of it is to have a good probability that two palloc's
+ * of the same number of bytes start out containing different data.
+ *
+ * The region may be NOACCESS, so make it UNDEFINED first to avoid errors as
+ * we fill it.  Filling the region makes it DEFINED, so make it UNDEFINED
+ * again afterward.  Whether to finally make it UNDEFINED or NOACCESS is
+ * fairly arbitrary.  UNDEFINED is more convenient for SlabRealloc(), and
+ * other callers have no preference.
+ */
+void
+randomize_mem(char *ptr, size_t size)
+{
+	static int	save_ctr = 1;
+	size_t		remaining = size;
+	int			ctr;
+
+	ctr = save_ctr;
+	VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+	while (remaining-- > 0)
+	{
+		*ptr++ = ctr;
+		if (++ctr > 251)
+			ctr = 1;
+	}
+	VALGRIND_MAKE_MEM_UNDEFINED(ptr - size, size);
+	save_ctr = ctr;
+}
+
+#endif   /* RANDOMIZE_ALLOCATED_MEMORY */
diff --git a/src/include/utils/memdebug.h b/src/include/utils/memdebug.h
index 90eb926..fc382c3 100644
--- a/src/include/utils/memdebug.h
+++ b/src/include/utils/memdebug.h
@@ -31,4 +31,52 @@
 #define VALGRIND_MEMPOOL_CHANGE(context, optr, nptr, size)	do {} while (0)
 #endif
 
+
+#ifdef CLOBBER_FREED_MEMORY
+
+/* Wipe freed memory for debugging purposes */
+static inline void
+wipe_mem(void *ptr, size_t size)
+{
+	VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+	memset(ptr, 0x7F, size);
+	VALGRIND_MAKE_MEM_NOACCESS(ptr, size);
+}
+
+#endif	/* CLOBBER_FREED_MEMORY */
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+static inline void
+set_sentinel(void *base, Size offset)
+{
+	char	   *ptr = (char *) base + offset;
+
+	VALGRIND_MAKE_MEM_UNDEFINED(ptr, 1);
+	*ptr = 0x7E;
+	VALGRIND_MAKE_MEM_NOACCESS(ptr, 1);
+}
+
+static inline bool
+sentinel_ok(const void *base, Size offset)
+{
+	const char *ptr = (const char *) base + offset;
+	bool		ret;
+
+	VALGRIND_MAKE_MEM_DEFINED(ptr, 1);
+	ret = *ptr == 0x7E;
+	VALGRIND_MAKE_MEM_NOACCESS(ptr, 1);
+
+	return ret;
+}
+
+#endif	/* MEMORY_CONTEXT_CHECKING */
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+
+void		randomize_mem(char *ptr, size_t size);
+
+#endif   /* RANDOMIZE_ALLOCATED_MEMORY */
+
+
 #endif   /* MEMDEBUG_H */
-- 
2.5.5

>From ec3ffcd37b88a3b86d0a56eebc21c24a17e1b7c6 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 20 Feb 2017 20:16:57 +0100
Subject: [PATCH 2/3] slab allocator

---
 src/backend/replication/logical/reorderbuffer.c |  82 +--
 src/backend/utils/mmgr/Makefile                 |   2 +-
 src/backend/utils/mmgr/slab.c                   | 800 ++++++++++++++++++++++++
 src/include/nodes/memnodes.h                    |   2 +-
 src/include/nodes/nodes.h                       |   1 +
 src/include/replication/reorderbuffer.h         |  15 +-
 src/include/utils/memutils.h                    |   9 +
 7 files changed, 842 insertions(+), 69 deletions(-)
 create mode 100644 src/backend/utils/mmgr/slab.c

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 7dc97fa..85e52ea 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -156,10 +156,7 @@ static const Size max_changes_in_memory = 4096;
  * major bottleneck, especially when spilling to disk while decoding batch
  * workloads.
  */
-static const Size max_cached_changes = 4096 * 2;
 static const Size max_cached_tuplebufs = 4096 * 2;		/* ~8MB */
-static const Size max_cached_transactions = 512;
-
 
 /* ---------------------------------------
  * primary reorderbuffer support routines
@@ -241,6 +238,22 @@ ReorderBufferAllocate(void)
 
 	buffer->context = new_ctx;
 
+	buffer->change_context = SlabContextCreate(new_ctx,
+											   "Change",
+											   SLAB_DEFAULT_BLOCK_SIZE,
+											   sizeof(ReorderBufferChange));
+
+	buffer->txn_context = SlabContextCreate(new_ctx,
+											"TXN",
+											SLAB_DEFAULT_BLOCK_SIZE,
+											sizeof(ReorderBufferTXN));
+
+	buffer->tup_context = AllocSetContextCreate(new_ctx,
+									"TupleBuf",
+									ALLOCSET_DEFAULT_MINSIZE,
+									ALLOCSET_DEFAULT_INITSIZE,
+									ALLOCSET_DEFAULT_MAXSIZE);
+
 	hash_ctl.keysize = sizeof(TransactionId);
 	hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
 	hash_ctl.hcxt = buffer->context;
@@ -251,8 +264,6 @@ ReorderBufferAllocate(void)
 	buffer->by_txn_last_xid = InvalidTransactionId;
 	buffer->by_txn_last_txn = NULL;
 
-	buffer->nr_cached_transactions = 0;
-	buffer->nr_cached_changes = 0;
 	buffer->nr_cached_tuplebufs = 0;
 
 	buffer->outbuf = NULL;
@@ -261,8 +272,6 @@ ReorderBufferAllocate(void)
 	buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
 	dlist_init(&buffer->toplevel_by_lsn);
-	dlist_init(&buffer->cached_transactions);
-	dlist_init(&buffer->cached_changes);
 	slist_init(&buffer->cached_tuplebufs);
 
 	return buffer;
@@ -291,19 +300,8 @@ ReorderBufferGetTXN(ReorderBuffer *rb)
 {
 	ReorderBufferTXN *txn;
 
-	/* check the slab cache */
-	if (rb->nr_cached_transactions > 0)
-	{
-		rb->nr_cached_transactions--;
-		txn = (ReorderBufferTXN *)
-			dlist_container(ReorderBufferTXN, node,
-							dlist_pop_head_node(&rb->cached_transactions));
-	}
-	else
-	{
-		txn = (ReorderBufferTXN *)
-			MemoryContextAlloc(rb->context, sizeof(ReorderBufferTXN));
-	}
+	txn = (ReorderBufferTXN *)
+		MemoryContextAlloc(rb->txn_context, sizeof(ReorderBufferTXN));
 
 	memset(txn, 0, sizeof(ReorderBufferTXN));
 
@@ -344,18 +342,7 @@ ReorderBufferReturnTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 		txn->invalidations = NULL;
 	}
 
-	/* check whether to put into the slab cache */
-	if (rb->nr_cached_transactions < max_cached_transactions)
-	{
-		rb->nr_cached_transactions++;
-		dlist_push_head(&rb->cached_transactions, &txn->node);
-		VALGRIND_MAKE_MEM_UNDEFINED(txn, sizeof(ReorderBufferTXN));
-		VALGRIND_MAKE_MEM_DEFINED(&txn->node, sizeof(txn->node));
-	}
-	else
-	{
-		pfree(txn);
-	}
+	pfree(txn);
 }
 
 /*
@@ -366,19 +353,8 @@ ReorderBufferGetChange(ReorderBuffer *rb)
 {
 	ReorderBufferChange *change;
 
-	/* check the slab cache */
-	if (rb->nr_cached_changes)
-	{
-		rb->nr_cached_changes--;
-		change = (ReorderBufferChange *)
-			dlist_container(ReorderBufferChange, node,
-							dlist_pop_head_node(&rb->cached_changes));
-	}
-	else
-	{
-		change = (ReorderBufferChange *)
-			MemoryContextAlloc(rb->context, sizeof(ReorderBufferChange));
-	}
+	change = (ReorderBufferChange *)
+		MemoryContextAlloc(rb->change_context, sizeof(ReorderBufferChange));
 
 	memset(change, 0, sizeof(ReorderBufferChange));
 	return change;
@@ -434,21 +410,9 @@ ReorderBufferReturnChange(ReorderBuffer *rb, ReorderBufferChange *change)
 			break;
 	}
 
-	/* check whether to put into the slab cache */
-	if (rb->nr_cached_changes < max_cached_changes)
-	{
-		rb->nr_cached_changes++;
-		dlist_push_head(&rb->cached_changes, &change->node);
-		VALGRIND_MAKE_MEM_UNDEFINED(change, sizeof(ReorderBufferChange));
-		VALGRIND_MAKE_MEM_DEFINED(&change->node, sizeof(change->node));
-	}
-	else
-	{
-		pfree(change);
-	}
+	pfree(change);
 }
 
-
 /*
  * Get an unused, possibly preallocated, ReorderBufferTupleBuf fitting at
  * least a tuple of size tuple_len (excluding header overhead).
@@ -491,7 +455,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 	else
 	{
 		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->context,
+			MemoryContextAlloc(rb->tup_context,
 							   sizeof(ReorderBufferTupleBuf) +
 							   MAXIMUM_ALIGNOF + alloc_len);
 		tuple->alloc_tuple_size = alloc_len;
diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index fc5f793..cd0e803 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o
+OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/slab.c b/src/backend/utils/mmgr/slab.c
new file mode 100644
index 0000000..f30c239
--- /dev/null
+++ b/src/backend/utils/mmgr/slab.c
@@ -0,0 +1,800 @@
+/*-------------------------------------------------------------------------
+ *
+ * slab.c
+ *	  SLAB allocator definitions.
+ *
+ * SLAB is a custom MemoryContext implementation designed for cases of
+ * equally-sized objects.
+ *
+ *
+ * Portions Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/slab.c
+ *
+ *
+ *	The constant allocation size allows significant simplification and various
+ *	optimizations. The blocks are carved into chunks of exactly the right size
+ *	(plus alignment), not wasting any memory.
+ *
+ *	The information about free chunks is maintained both at the block level and
+ *	global (context) level. This is possible as the chunk size (and thus also
+ *	the number of chunks per block) is fixed.
+ *
+ *	On each block, free chunks are tracked in a simple linked list. Contents
+ *	of free chunks is replaced with an index of the next free chunk, forming
+ *	a very simple linked list. Each block also contains a counter of free
+ *	chunks. Combined with the local block-level freelist, it makes it trivial
+ *	to eventually free the whole block.
+ *
+ *	At the context level, we use 'freelist' to track blocks ordered by number
+ *	of free chunks, starting with blocks having a single allocated chunk, and
+ *	with completely full blocks on the tail.
+ *
+ *	This also allows various optimizations - for example when searching for
+ *	free chunk, we the allocator reuses space from the most full blocks first,
+ *	in the hope that some of the less full blocks will get completely empty
+ *	(and returned back to the OS).
+ *
+ *	For each block, we maintain pointer to the first free chunk - this is quite
+ *	cheap and allows us to skip all the preceding used chunks, eliminating
+ *	a significant number of lookups in many common usage patters. In the worst
+ *	case this performs as if the pointer was not maintained.
+ *
+ *	We cache indexes of the first empty chunk on each block (firstFreeChunk),
+ *	and freelist index for blocks with least free chunks (minFreeChunks), so
+ *	that we don't have to search the freelist and block on every SlabAlloc()
+ *	call, which is quite expensive.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define SLAB_BLOCKHDRSZ MAXALIGN(sizeof(SlabBlockData))
+#define SLAB_CHUNKHDRSZ MAXALIGN(sizeof(SlabChunkData))
+
+/* Portion of SLAB_CHUNKHDRSZ examined outside slab.c. */
+#define SLAB_CHUNK_PUBLIC	\
+	(offsetof(SlabChunkData, size) + sizeof(Size))
+
+/* Portion of SLAB_CHUNKHDRSZ excluding trailing padding. */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define SLAB_CHUNK_USED \
+	(offsetof(SlabChunkData, requested_size) + sizeof(Size))
+#else
+#define SLAB_CHUNK_USED \
+	(offsetof(SlabChunkData, size) + sizeof(Size))
+#endif
+
+typedef struct SlabBlockData *SlabBlock;		/* forward reference */
+typedef struct SlabChunkData *SlabChunk;
+
+/*
+ * SlabPointer
+ *		Aligned pointer which may be a member of an allocation set.
+ */
+typedef void *SlabPointer;
+
+/*
+ * SlabContext is a specialized implementation of MemoryContext.
+ */
+typedef struct SlabContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+	/* Allocation parameters for this context: */
+	Size		chunkSize;		/* chunk size */
+	Size		fullChunkSize;	/* chunk size including header and alignment */
+	Size		blockSize;		/* block size */
+	int			chunksPerBlock; /* number of chunks per block */
+	int			minFreeChunks;	/* min number of free chunks in any block */
+	int			nblocks;		/* number of blocks allocated */
+	/* blocks with free space, grouped by number of free chunks: */
+	dlist_head	freelist[FLEXIBLE_ARRAY_MEMBER];
+}	SlabContext;
+
+typedef SlabContext *Slab;
+
+/*
+ * SlabBlockData
+ *		Structure of a single block in SLAB allocator.
+ *
+ * node: doubly-linked list of blocks in global freelist
+ * nfree: number of free chunks in this block
+ * firstFreeChunk: index of the first free chunk
+ */
+typedef struct SlabBlockData
+{
+	dlist_node	node;			/* doubly-linked list */
+	int			nfree;			/* number of free chunks */
+	int			firstFreeChunk; /* index of the first free chunk in the block */
+}	SlabBlockData;
+
+/*
+ * SlabChunk
+ *		The prefix of each piece of memory in an SlabBlock
+ */
+typedef struct SlabChunkData
+{
+	/* block owning this chunk */
+	void	   *block;
+
+	/* include StandardChunkHeader because mcxt.c expects that */
+	StandardChunkHeader header;
+
+}	SlabChunkData;
+
+
+/*
+ * SlabIsValid
+ *		True iff set is valid allocation set.
+ */
+#define SlabIsValid(set) PointerIsValid(set)
+
+#define SlabPointerGetChunk(ptr)	\
+					((SlabChunk)(((char *)(ptr)) - SLAB_CHUNKHDRSZ))
+#define SlabChunkGetPointer(chk)	\
+					((SlabPointer)(((char *)(chk)) + SLAB_CHUNKHDRSZ))
+#define SlabBlockGetChunk(set, block, idx)	\
+					((SlabChunk) ((char *) (block) + sizeof(SlabBlockData)	\
+					+ (idx * set->fullChunkSize)))
+#define SlabBlockStart(block)	\
+				((char *) block + sizeof(SlabBlockData))
+#define SlabChunkIndex(set, block, chunk)	\
+				(((char *) chunk - SlabBlockStart(block)) / set->fullChunkSize)
+/*
+ * These functions implement the MemoryContext API for Slab contexts.
+ */
+static void *SlabAlloc(MemoryContext context, Size size);
+static void SlabFree(MemoryContext context, void *pointer);
+static void *SlabRealloc(MemoryContext context, void *pointer, Size size);
+static void SlabInit(MemoryContext context);
+static void SlabReset(MemoryContext context);
+static void SlabDelete(MemoryContext context);
+static Size SlabGetChunkSpace(MemoryContext context, void *pointer);
+static bool SlabIsEmpty(MemoryContext context);
+static void SlabStats(MemoryContext context, int level, bool print,
+		  MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void SlabCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Slab contexts.
+ */
+static MemoryContextMethods SlabMethods = {
+	SlabAlloc,
+	SlabFree,
+	SlabRealloc,
+	SlabInit,
+	SlabReset,
+	SlabDelete,
+	SlabGetChunkSpace,
+	SlabIsEmpty,
+	SlabStats
+#ifdef MEMORY_CONTEXT_CHECKING
+	,SlabCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define SlabFreeInfo(_cxt, _chunk) \
+			fprintf(stderr, "SlabFree: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#define SlabAllocInfo(_cxt, _chunk) \
+			fprintf(stderr, "SlabAlloc: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#else
+#define SlabFreeInfo(_cxt, _chunk)
+#define SlabAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * SlabContextCreate
+ *		Create a new Slab context.
+ *
+ * parent: parent context, or NULL if top-level context
+ * name: name of context (for debugging --- string will be copied)
+ * blockSize: allocation block size
+ * chunkSize: allocation chunk size
+ *
+ * The chunkSize may not exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - SLAB_BLOCKHDRSZ - SLAB_CHUNKHDRSZ
+ *
+ */
+MemoryContext
+SlabContextCreate(MemoryContext parent,
+				  const char *name,
+				  Size blockSize,
+				  Size chunkSize)
+{
+	int			chunksPerBlock;
+	Size		fullChunkSize;
+	Size		freelistSize;
+	Slab		set;
+
+	/* chunk, including SLAB header (both addresses nicely aligned) */
+	fullChunkSize = MAXALIGN(sizeof(SlabChunkData) + MAXALIGN(chunkSize));
+
+	/* Make sure the block can store at least one chunk. */
+	if (blockSize - sizeof(SlabBlockData) < fullChunkSize)
+		elog(ERROR, "block size %ld for slab is too small for %ld chunks",
+			 blockSize, chunkSize);
+
+	/* Compute maximum number of chunks per block */
+	chunksPerBlock = (blockSize - sizeof(SlabBlockData)) / fullChunkSize;
+
+	/* The freelist starts with 0, ends with chunksPerBlock. */
+	freelistSize = sizeof(dlist_head) * (chunksPerBlock + 1);
+
+	/* if we can't fit at least one chunk into the block, we're hosed */
+	Assert(chunksPerBlock > 0);
+
+	/* make sure the chunks actually fit on the block	*/
+	Assert((fullChunkSize * chunksPerBlock) + sizeof(SlabBlockData) <= blockSize);
+
+	/* Do the type-independent part of context creation */
+	set = (Slab) MemoryContextCreate(T_SlabContext,
+									 (offsetof(SlabContext, freelist) + freelistSize),
+									 &SlabMethods,
+									 parent,
+									 name);
+
+	set->blockSize = blockSize;
+	set->chunkSize = chunkSize;
+	set->fullChunkSize = fullChunkSize;
+	set->chunksPerBlock = chunksPerBlock;
+	set->nblocks = 0;
+	set->minFreeChunks = 0;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * SlabInit
+ *		Context-type-specific initialization routine.
+ */
+static void
+SlabInit(MemoryContext context)
+{
+	int			i;
+	Slab		set = (Slab) context;
+
+	/* initialize the freelist slots */
+	for (i = 0; i < (set->chunksPerBlock + 1); i++)
+		dlist_init(&set->freelist[i]);
+}
+
+/*
+ * SlabReset
+ *		Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+SlabReset(MemoryContext context)
+{
+	int			i;
+	Slab		set = (Slab) context;
+
+	AssertArg(SlabIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Check for corruption and leaks before freeing */
+	SlabCheck(context);
+#endif
+
+	/* walk over freelists and free the blocks */
+	for (i = 0; i <= set->chunksPerBlock; i++)
+	{
+		dlist_mutable_iter miter;
+
+		dlist_foreach_modify(miter, &set->freelist[i])
+		{
+			SlabBlock	block = dlist_container(SlabBlockData, node, miter.cur);
+
+			dlist_delete(miter.cur);
+
+#ifdef CLOBBER_FREED_MEMORY
+			wipe_mem(block, set->blockSize);
+#endif
+			free(block);
+			set->nblocks--;
+		}
+	}
+
+	set->minFreeChunks = 0;
+
+	Assert(set->nblocks == 0);
+}
+
+/*
+ * SlabDelete
+ *		Frees all memory which is allocated in the given set, in preparation
+ *		for deletion of the set. We simply call SlabReset().
+ */
+static void
+SlabDelete(MemoryContext context)
+{
+	/* just reset the context */
+	SlabReset(context);
+}
+
+/*
+ * SlabAlloc
+ *		Returns pointer to allocated memory of given size or NULL if
+ *		request could not be completed; memory is added to the set.
+ */
+static void *
+SlabAlloc(MemoryContext context, Size size)
+{
+	Slab		set = (Slab) context;
+	SlabBlock	block;
+	SlabChunk	chunk;
+	int			idx;
+
+	AssertArg(SlabIsValid(set));
+
+	Assert((set->minFreeChunks >= 0) && (set->minFreeChunks < set->chunksPerBlock));
+
+	/* make sure we only allow correct request size */
+	if (size != set->chunkSize)
+		elog(ERROR, "unexpected alloc chunk size %ld (expected %ld)",
+			 size, set->chunkSize);
+
+	/*
+	 * If there are no free chunks in any existing block, create a new block
+	 * and put it to the last freelist bucket.
+	 *
+	 * (set->minFreeChunks == 0) means there are no blocks with free chunks,
+	 * thanks to how minFreeChunks is updated at the end of SlabAlloc().
+	 */
+	if (set->minFreeChunks == 0)
+	{
+		block = (SlabBlock) malloc(set->blockSize);
+
+		if (block == NULL)
+			return NULL;
+
+		block->nfree = set->chunksPerBlock;
+		block->firstFreeChunk = 0;
+
+		/*
+		 * Put all the chunks on a freelist. Walk the chunks and point each
+		 * one to the next one.
+		 */
+		for (idx = 0; idx < set->chunksPerBlock; idx++)
+		{
+			chunk = SlabBlockGetChunk(set, block, idx);
+			*(int32 *)SlabChunkGetPointer(chunk) = (idx + 1);
+		}
+
+		/*
+		 * And add it to the last freelist with all chunks empty.
+		 *
+		 * XXX We know there are no blocks in the freelist, otherwise we
+		 * wouldn't need a new block.
+		 */
+		Assert(dlist_is_empty(&set->freelist[set->chunksPerBlock]));
+
+		dlist_push_head(&set->freelist[set->chunksPerBlock], &block->node);
+
+		set->minFreeChunks = set->chunksPerBlock;
+		set->nblocks += 1;
+	}
+
+	/* grab the block from the freelist (even the new block is there) */
+	block = dlist_head_element(SlabBlockData, node,
+							   &set->freelist[set->minFreeChunks]);
+
+	/* make sure we actually got a valid block, with matching nfree */
+	Assert(block != NULL);
+	Assert(set->minFreeChunks == block->nfree);
+	Assert(block->nfree > 0);
+
+	/* we know index of the first free chunk in the block */
+	idx = block->firstFreeChunk;
+
+	/* make sure the chunk index is valid, and that it's marked as empty */
+	Assert((idx >= 0) && (idx < set->chunksPerBlock));
+
+	/* compute the chunk location block start (after the block header) */
+	chunk = SlabBlockGetChunk(set, block, idx);
+
+	/*
+	 * Update the block nfree count, and also the minFreeChunks as we've
+	 * decreased nfree for a block with the minimum number of free chunks
+	 * (because that's how we chose the block).
+	 */
+	block->nfree--;
+	set->minFreeChunks = block->nfree;
+
+	/*
+	 * Remove the chunk from the freelist head. The index of the next free
+	 * chunk is stored in the chunk itself.
+	 */
+	block->firstFreeChunk = *(int32 *)SlabChunkGetPointer(chunk);
+
+	Assert(block->firstFreeChunk >= 0);
+	Assert(block->firstFreeChunk <= set->chunksPerBlock);
+
+	Assert(((block->nfree != 0) && (block->firstFreeChunk < set->chunksPerBlock)) ||
+		   ((block->nfree == 0) && (block->firstFreeChunk == set->chunksPerBlock)));
+
+	/* move the whole block to the right place in the freelist */
+	dlist_delete(&block->node);
+	dlist_push_head(&set->freelist[block->nfree], &block->node);
+
+	/*
+	 * And finally update minFreeChunks, i.e. the index to the block with the
+	 * lowest number of free chunks. We only need to do that when the block
+	 * got full (otherwise we know the current block is the right one). We'll
+	 * simply walk the freelist until we find a non-empty entry.
+	 */
+	if (set->minFreeChunks == 0)
+	{
+		for (idx = 1; idx <= set->chunksPerBlock; idx++)
+		{
+			if (dlist_is_empty(&set->freelist[idx]))
+				continue;
+
+			/* found a non-empty freelist */
+			set->minFreeChunks = idx;
+			break;
+		}
+	}
+
+	if (set->minFreeChunks == set->chunksPerBlock)
+		set->minFreeChunks = 0;
+
+	/* Prepare to initialize the chunk header. */
+	VALGRIND_MAKE_MEM_UNDEFINED(chunk, SLAB_CHUNK_USED);
+
+	chunk->block = (void *) block;
+
+	chunk->header.context = (MemoryContext) set;
+	chunk->header.size = MAXALIGN(size);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	chunk->header.requested_size = size;
+	VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+							   sizeof(chunk->header.requested_size));
+	/* set mark to catch clobber of "unused" space */
+	if (size < chunk->header.size)
+		set_sentinel(SlabChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) SlabChunkGetPointer(chunk), size);
+#endif
+
+	SlabAllocInfo(set, chunk);
+	return SlabChunkGetPointer(chunk);
+}
+
+/*
+ * SlabFree
+ *		Frees allocated memory; memory is removed from the set.
+ */
+static void
+SlabFree(MemoryContext context, void *pointer)
+{
+	int			idx;
+	Slab		set = (Slab) context;
+	SlabChunk	chunk = SlabPointerGetChunk(pointer);
+	SlabBlock	block = chunk->block;
+
+	SlabFreeInfo(set, chunk);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+							  sizeof(chunk->header.requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->header.requested_size < chunk->header.size)
+		if (!sentinel_ok(pointer, chunk->header.requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 set->header.name, chunk);
+#endif
+
+	/* compute index of the chunk with respect to block start */
+	idx = SlabChunkIndex(set, block, chunk);
+
+	/* mark the chunk as unused (zero the bit), and update block nfree count */
+	*(int32 *)pointer = block->firstFreeChunk;
+	block->firstFreeChunk = idx;
+	block->nfree++;
+
+	Assert(block->nfree > 0);
+	Assert(block->nfree <= set->chunksPerBlock);
+
+#ifdef CLOBBER_FREED_MEMORY
+	/* XXX don't wipe the int32 index, used for block-level freelist */
+	wipe_mem((char *)pointer + sizeof(int32), chunk->header.size - sizeof(int32));
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Reset requested_size to 0 in chunks that are on freelist */
+	chunk->header.requested_size = 0;
+#endif
+
+	/* remove the block from a freelist */
+	dlist_delete(&block->node);
+
+	/*
+	 * See if we need to update the minFreeChunks field for the set - we only
+	 * need to do that if there the block had that number of free chunks
+	 * before we freed one. In that case, we check if there still are blocks
+	 * in the original freelist and we either keep the current value (if there
+	 * still are blocks) or increment it by one (the new block is still the
+	 * one with minimum free chunks).
+	 *
+	 * The one exception is when the block will get completely free - in that
+	 * case we will free it, se we can't use it for minFreeChunks. It however
+	 * means there are no more blocks with free chunks.
+	 */
+	if (set->minFreeChunks == (block->nfree - 1))
+	{
+		/* Have we removed the last chunk from the freelist? */
+		if (dlist_is_empty(&set->freelist[set->minFreeChunks]))
+		{
+			/* but if we made the block entirely free, we'll free it */
+			if (block->nfree == set->chunksPerBlock)
+				set->minFreeChunks = 0;
+			else
+				set->minFreeChunks++;
+		}
+	}
+
+	/* If the block is now completely empty, free it. */
+	if (block->nfree == set->chunksPerBlock)
+	{
+		free(block);
+		set->nblocks--;
+	}
+	else
+		dlist_push_head(&set->freelist[block->nfree], &block->node);
+
+	Assert(set->nblocks >= 0);
+}
+
+/*
+ * SlabRealloc
+ *		As Slab is designed for allocating equally-sized chunks of memory, it
+ *		can't really do an actual realloc.
+ *
+ * We try to be gentle and allow calls with exactly the same size as in that
+ * case we can simply return the same chunk. When the size differs, we fail
+ * with assert failure or return NULL.
+ *
+ * We might be even support cases with (size < chunkSize). That however seems
+ * rather pointless - Slab is meant for chunks of constant size, and moreover
+ * realloc is usually used to enlarge the chunk.
+ *
+ * XXX Perhaps we should not be gentle at all and simply fails in all cases,
+ * to eliminate the (mostly pointless) uncertainty.
+ */
+static void *
+SlabRealloc(MemoryContext context, void *pointer, Size size)
+{
+	Slab		set = (Slab) context;
+
+	/* can't do actual realloc with slab, but let's try to be gentle */
+	if (size == set->chunkSize)
+		return pointer;
+
+	elog(ERROR, "slab allocator does not support realloc()");
+}
+
+/*
+ * SlabGetChunkSpace
+ *		Given a currently-allocated chunk, determine the total space
+ *		it occupies (including all memory-allocation overhead).
+ */
+static Size
+SlabGetChunkSpace(MemoryContext context, void *pointer)
+{
+	SlabChunk	chunk = SlabPointerGetChunk(pointer);
+
+	return chunk->header.size + SLAB_CHUNKHDRSZ;
+}
+
+/*
+ * SlabIsEmpty
+ *		Is an Slab empty of any allocated space?
+ */
+static bool
+SlabIsEmpty(MemoryContext context)
+{
+	Slab		set = (Slab) context;
+
+	return (set->nblocks == 0);
+}
+
+/*
+ * SlabStats
+ *		Compute stats about memory consumption of an Slab.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Slab into *totals.
+ */
+static void
+SlabStats(MemoryContext context, int level, bool print,
+		  MemoryContextCounters *totals)
+{
+	Slab		set = (Slab) context;
+	Size		nblocks = 0;
+	Size		freechunks = 0;
+	Size		totalspace = 0;
+	Size		freespace = 0;
+	int			i;
+
+	for (i = 0; i <= set->chunksPerBlock; i++)
+	{
+		dlist_iter	iter;
+
+		dlist_foreach(iter, &set->freelist[i])
+		{
+			SlabBlock	block = dlist_container(SlabBlockData, node, iter.cur);
+
+			nblocks++;
+			totalspace += set->blockSize;
+			freespace += set->fullChunkSize * block->nfree;
+			freechunks += block->nfree;
+		}
+	}
+
+	if (print)
+	{
+		for (i = 0; i < level; i++)
+			fprintf(stderr, "  ");
+		fprintf(stderr,
+			"Slab: %s: %zu total in %zd blocks; %zu free (%zd chunks); %zu used\n",
+				set->header.name, totalspace, nblocks, freespace, freechunks,
+				totalspace - freespace);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += nblocks;
+		totals->freechunks += freechunks;
+		totals->totalspace += totalspace;
+		totals->freespace += freespace;
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * SlabCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+SlabCheck(MemoryContext context)
+{
+	int			i;
+	Slab		slab = (Slab) context;
+	char	   *name = slab->header.name;
+	char	   *freechunks;
+
+	Assert(slab->chunksPerBlock > 0);
+
+	/* bitmap of free chunks on a block */
+	freechunks = palloc(slab->chunksPerBlock * sizeof(bool));
+
+	/* walk all the freelists */
+	for (i = 0; i <= slab->chunksPerBlock; i++)
+	{
+		int			j,
+					nfree;
+		dlist_iter	iter;
+
+		/* walk all blocks on this freelist */
+		dlist_foreach(iter, &slab->freelist[i])
+		{
+			int			idx;
+			SlabBlock	block = dlist_container(SlabBlockData, node, iter.cur);
+
+			/*
+			 * Make sure the number of free chunks (in the block header) matches
+			 * position in the freelist.
+			 */
+			if (block->nfree != i)
+				elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match freelist %d",
+					 name, block->nfree, block, i);
+
+			/* reset the bitmap of free chunks for this block */
+			memset(freechunks, 0, (slab->chunksPerBlock * sizeof(bool)));
+			idx = block->firstFreeChunk;
+
+			/*
+			 * Now walk through the chunks, count the free ones and also perform
+			 * some additional checks for the used ones. As the chunk freelist
+			 * is stored within the chunks themselves, we have to walk through
+			 * the chunks and construct our own bitmap.
+			 */
+
+			nfree = 0;
+			while (idx < slab->chunksPerBlock)
+			{
+				SlabChunk	chunk;
+
+				/* count the chunk as free, add it to the bitmap */
+				nfree++;
+				freechunks[idx] = true;
+
+				/* read index of the next free chunk */
+				chunk = SlabBlockGetChunk(slab, block, idx);
+				idx = *(int32 *)SlabChunkGetPointer(chunk);
+			}
+
+			for (j = 0; j < slab->chunksPerBlock; j++)
+			{
+				/* non-zero bit in the bitmap means chunk the chunk is used */
+				if (! freechunks[j])
+				{
+					SlabChunk	chunk = SlabBlockGetChunk(slab, block, j);
+
+					VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+										   sizeof(chunk->header.requested_size));
+
+					/* we're in a no-freelist branch */
+					VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+										   sizeof(chunk->header.requested_size));
+
+					/* chunks have both block and slab pointers, so check both */
+					if (chunk->block != block)
+						elog(WARNING, "problem in slab %s: bogus block link in block %p, chunk %p",
+							 name, block, chunk);
+
+					if (chunk->header.context != (MemoryContext) slab)
+						elog(WARNING, "problem in slab %s: bogus slab link in block %p, chunk %p",
+							 name, block, chunk);
+
+					/* now make sure the chunk size is correct */
+					if (chunk->header.size != MAXALIGN(slab->chunkSize))
+						elog(WARNING, "problem in slab %s: bogus chunk size in block %p, chunk %p",
+							 name, block, chunk);
+
+					/* now make sure the chunk size is correct */
+					if (chunk->header.requested_size != slab->chunkSize)
+						elog(WARNING, "problem in slab %s: bogus chunk requested size in block %p, chunk %p",
+							 name, block, chunk);
+
+					/* there might be sentinel (thanks to alignment) */
+					if (chunk->header.requested_size < chunk->header.size &&
+						!sentinel_ok(chunk, SLAB_CHUNKHDRSZ + chunk->header.requested_size))
+						elog(WARNING, "problem in slab %s: detected write past chunk end in block %p, chunk %p",
+							 name, block, chunk);
+				}
+			}
+
+			/*
+			 * Make sure we got the expected number of free chunks (as tracked in
+			 * the block header).
+			 */
+			if (nfree != block->nfree)
+				elog(WARNING, "problem in slab %s: number of free chunks %d in block %p does not match bitmap %d",
+					 name, block->nfree, block, nfree);
+		}
+	}
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index e487d17..fe6bc90 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -96,6 +96,6 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
 	((context) != NULL && \
-	 (IsA((context), AllocSetContext)))
+	 (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
 
 #endif   /* MEMNODES_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 95dd8ba..28aca92 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -278,6 +278,7 @@ typedef enum NodeTag
 	 */
 	T_MemoryContext,
 	T_AllocSetContext,
+	T_SlabContext,
 
 	/*
 	 * TAGS FOR VALUE NODES (value.h)
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index 25b0fc8..c931e83 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -331,6 +331,13 @@ struct ReorderBuffer
 	MemoryContext context;
 
 	/*
+	 * Memory contexts for each type of object (TXNs, changes and tuple buffers)
+	 */
+	MemoryContext change_context;
+	MemoryContext txn_context;
+	MemoryContext tup_context;
+
+	/*
 	 * Data structure slab cache.
 	 *
 	 * We allocate/deallocate some structures very frequently, to avoid bigger
@@ -340,14 +347,6 @@ struct ReorderBuffer
 	 * on top of reorderbuffer.c
 	 */
 
-	/* cached ReorderBufferTXNs */
-	dlist_head	cached_transactions;
-	Size		nr_cached_transactions;
-
-	/* cached ReorderBufferChanges */
-	dlist_head	cached_changes;
-	Size		nr_cached_changes;
-
 	/* cached ReorderBufferTupleBufs */
 	slist_head	cached_tuplebufs;
 	Size		nr_cached_tuplebufs;
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 1d1035e..5223a4d 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -135,6 +135,12 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent,
 					  Size initBlockSize,
 					  Size maxBlockSize);
 
+/* slab.c */
+extern MemoryContext SlabContextCreate(MemoryContext parent,
+				  const char *name,
+				  Size blockSize,
+				  Size chunkSize);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.
@@ -171,4 +177,7 @@ extern MemoryContext AllocSetContextCreate(MemoryContext parent,
  */
 #define ALLOCSET_SEPARATE_THRESHOLD  8192
 
+#define SLAB_DEFAULT_BLOCK_SIZE		(8 * 1024)
+#define SLAB_LARGE_BLOCK_SIZE		(8 * 1024 * 1024)
+
 #endif   /* MEMUTILS_H */
-- 
2.5.5

>From f52f71265d91a96154b74ae00e6f99df2f6f3e48 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 20 Feb 2017 20:17:37 +0100
Subject: [PATCH 3/3] generational context

---
 src/backend/replication/logical/reorderbuffer.c |  78 +--
 src/backend/utils/mmgr/Makefile                 |   2 +-
 src/backend/utils/mmgr/gen.c                    | 758 ++++++++++++++++++++++++
 src/backend/utils/mmgr/generation.c             | 758 ++++++++++++++++++++++++
 src/include/nodes/memnodes.h                    |   4 +-
 src/include/nodes/nodes.h                       |   1 +
 src/include/replication/reorderbuffer.h         |  14 -
 src/include/utils/memutils.h                    |   5 +
 8 files changed, 1536 insertions(+), 84 deletions(-)
 create mode 100644 src/backend/utils/mmgr/gen.c
 create mode 100644 src/backend/utils/mmgr/generation.c

diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 85e52ea..0bdc214 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -149,15 +149,6 @@ typedef struct ReorderBufferDiskChange
  */
 static const Size max_changes_in_memory = 4096;
 
-/*
- * We use a very simple form of a slab allocator for frequently allocated
- * objects, simply keeping a fixed number in a linked list when unused,
- * instead pfree()ing them. Without that in many workloads aset.c becomes a
- * major bottleneck, especially when spilling to disk while decoding batch
- * workloads.
- */
-static const Size max_cached_tuplebufs = 4096 * 2;		/* ~8MB */
-
 /* ---------------------------------------
  * primary reorderbuffer support routines
  * ---------------------------------------
@@ -248,11 +239,9 @@ ReorderBufferAllocate(void)
 											SLAB_DEFAULT_BLOCK_SIZE,
 											sizeof(ReorderBufferTXN));
 
-	buffer->tup_context = AllocSetContextCreate(new_ctx,
-									"TupleBuf",
-									ALLOCSET_DEFAULT_MINSIZE,
-									ALLOCSET_DEFAULT_INITSIZE,
-									ALLOCSET_DEFAULT_MAXSIZE);
+	buffer->tup_context = GenerationContextCreate(new_ctx,
+										   "Tuples",
+										   SLAB_LARGE_BLOCK_SIZE);
 
 	hash_ctl.keysize = sizeof(TransactionId);
 	hash_ctl.entrysize = sizeof(ReorderBufferTXNByIdEnt);
@@ -264,15 +253,12 @@ ReorderBufferAllocate(void)
 	buffer->by_txn_last_xid = InvalidTransactionId;
 	buffer->by_txn_last_txn = NULL;
 
-	buffer->nr_cached_tuplebufs = 0;
-
 	buffer->outbuf = NULL;
 	buffer->outbufsize = 0;
 
 	buffer->current_restart_decoding_lsn = InvalidXLogRecPtr;
 
 	dlist_init(&buffer->toplevel_by_lsn);
-	slist_init(&buffer->cached_tuplebufs);
 
 	return buffer;
 }
@@ -425,42 +411,12 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 
 	alloc_len = tuple_len + SizeofHeapTupleHeader;
 
-	/*
-	 * Most tuples are below MaxHeapTupleSize, so we use a slab allocator for
-	 * those. Thus always allocate at least MaxHeapTupleSize. Note that tuples
-	 * generated for oldtuples can be bigger, as they don't have out-of-line
-	 * toast columns.
-	 */
-	if (alloc_len < MaxHeapTupleSize)
-		alloc_len = MaxHeapTupleSize;
-
-
-	/* if small enough, check the slab cache */
-	if (alloc_len <= MaxHeapTupleSize && rb->nr_cached_tuplebufs)
-	{
-		rb->nr_cached_tuplebufs--;
-		tuple = slist_container(ReorderBufferTupleBuf, node,
-								slist_pop_head_node(&rb->cached_tuplebufs));
-		Assert(tuple->alloc_tuple_size == MaxHeapTupleSize);
-#ifdef USE_ASSERT_CHECKING
-		memset(&tuple->tuple, 0xa9, sizeof(HeapTupleData));
-		VALGRIND_MAKE_MEM_UNDEFINED(&tuple->tuple, sizeof(HeapTupleData));
-#endif
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-#ifdef USE_ASSERT_CHECKING
-		memset(tuple->tuple.t_data, 0xa8, tuple->alloc_tuple_size);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-#endif
-	}
-	else
-	{
-		tuple = (ReorderBufferTupleBuf *)
-			MemoryContextAlloc(rb->tup_context,
-							   sizeof(ReorderBufferTupleBuf) +
-							   MAXIMUM_ALIGNOF + alloc_len);
-		tuple->alloc_tuple_size = alloc_len;
-		tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
-	}
+	tuple = (ReorderBufferTupleBuf *)
+		MemoryContextAlloc(rb->tup_context,
+						   sizeof(ReorderBufferTupleBuf) +
+						   MAXIMUM_ALIGNOF + alloc_len);
+	tuple->alloc_tuple_size = alloc_len;
+	tuple->tuple.t_data = ReorderBufferTupleBufData(tuple);
 
 	return tuple;
 }
@@ -474,21 +430,7 @@ ReorderBufferGetTupleBuf(ReorderBuffer *rb, Size tuple_len)
 void
 ReorderBufferReturnTupleBuf(ReorderBuffer *rb, ReorderBufferTupleBuf *tuple)
 {
-	/* check whether to put into the slab cache, oversized tuples never are */
-	if (tuple->alloc_tuple_size == MaxHeapTupleSize &&
-		rb->nr_cached_tuplebufs < max_cached_tuplebufs)
-	{
-		rb->nr_cached_tuplebufs++;
-		slist_push_head(&rb->cached_tuplebufs, &tuple->node);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple->tuple.t_data, tuple->alloc_tuple_size);
-		VALGRIND_MAKE_MEM_UNDEFINED(tuple, sizeof(ReorderBufferTupleBuf));
-		VALGRIND_MAKE_MEM_DEFINED(&tuple->node, sizeof(tuple->node));
-		VALGRIND_MAKE_MEM_DEFINED(&tuple->alloc_tuple_size, sizeof(tuple->alloc_tuple_size));
-	}
-	else
-	{
-		pfree(tuple);
-	}
+	pfree(tuple);
 }
 
 /*
diff --git a/src/backend/utils/mmgr/Makefile b/src/backend/utils/mmgr/Makefile
index cd0e803..7263399 100644
--- a/src/backend/utils/mmgr/Makefile
+++ b/src/backend/utils/mmgr/Makefile
@@ -12,6 +12,6 @@ subdir = src/backend/utils/mmgr
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = aset.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
+OBJS = aset.o generation.o dsa.o freepage.o mcxt.o memdebug.o portalmem.o slab.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/mmgr/gen.c b/src/backend/utils/mmgr/gen.c
new file mode 100644
index 0000000..669a512
--- /dev/null
+++ b/src/backend/utils/mmgr/gen.c
@@ -0,0 +1,758 @@
+/*-------------------------------------------------------------------------
+ *
+ * generation.c
+ *	  Generational allocator definitions.
+ *
+ * Generation is a custom MemoryContext implementation designed for cases of
+ * chunks with similar lifespan.
+ *
+ * Portions Copyright (c) 2016, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/Generation.c
+ *
+ *
+ *	This memory context is based on the assumption that the allocated chunks
+ *	have similar lifespan, i.e. that chunks allocated close from each other
+ *	(by time) will also be freed in close proximity, and mostly in the same
+ *	order. This is typical for various queue-like use cases, i.e. when tuples
+ *	are constructed, processed and then thrown away.
+ *
+ *	The memory context uses a very simple approach to free space management.
+ *	Instead of a complex global freelist, each block tracks a number
+ *	of allocated and freed chunks. The space release by freed chunks is not
+ *	reused, and once all chunks are freed (i.e. when nallocated == nfreed),
+ *	the whole block is thrown away. When the allocated chunks have similar
+ *	lifespan, this works very well and is extremely cheap.
+ *
+ *	The current implementation only uses a fixed block size - maybe it should
+ *	adapt a min/max block size range, and grow the blocks automatically.
+ *	It already uses dedicated blocks for oversized chunks.
+ *
+ *	XXX It might be possible to improve this by keeping a small freelist for
+ *	only a small number of recent blocks, but it's not clear it's worth the
+ *	additional complexity.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define Generation_BLOCKHDRSZ	MAXALIGN(sizeof(GenerationBlockData))
+#define Generation_CHUNKHDRSZ	MAXALIGN(sizeof(GenerationChunkData))
+
+/* Portion of Generation_CHUNKHDRSZ examined outside Generation.c. */
+#define Generation_CHUNK_PUBLIC	\
+	(offsetof(GenerationChunkData, size) + sizeof(Size))
+
+/* Portion of Generation_CHUNKHDRSZ excluding trailing padding. */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunkData, requested_size) + sizeof(Size))
+#else
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunkData, size) + sizeof(Size))
+#endif
+
+typedef struct GenerationBlockData *GenerationBlock;	/* forward reference */
+typedef struct GenerationChunkData *GenerationChunk;
+
+typedef void *GenerationPointer;
+
+/*
+ * GenerationContext is a simple memory context not reusing allocated chunks, and
+ * freeing blocks once all chunks are freed.
+ */
+typedef struct GenerationContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+
+	/* Generationerational context parameters */
+	Size		blockSize;		/* block size */
+
+	GenerationBlock	block;			/* current (most recently allocated) block */
+	dlist_head	blocks;			/* list of blocks */
+
+}	GenerationContext;
+
+typedef GenerationContext *Generation;
+
+/*
+ * GenerationBlockData
+ *		A GenerationBlock is the unit of memory that is obtained by Generation.c
+ *		from malloc().  It contains one or more GenerationChunks, which are
+ *		the units requested by palloc() and freed by pfree().  GenerationChunks
+ *		cannot be returned to malloc() individually, instead pfree()
+ *		updates a free counter on a block and when all chunks on a block
+ *		are freed the whole block is returned to malloc().
+ *
+ *		GenerationBlockData is the header data for a block --- the usable space
+ *		within the block begins at the next alignment boundary.
+ */
+typedef struct GenerationBlockData
+{
+	dlist_node	node;			/* doubly-linked list */
+	int			nchunks;		/* number of chunks in the block */
+	int			nfree;			/* number of free chunks */
+	char	   *freeptr;		/* start of free space in this block */
+	char	   *endptr;			/* end of space in this block */
+}	GenerationBlockData;
+
+/*
+ * GenerationChunk
+ *		The prefix of each piece of memory in an GenerationBlock
+ */
+typedef struct GenerationChunkData
+{
+	/* block owning this chunk */
+	void	   *block;
+
+	/* include StandardChunkHeader because mcxt.c expects that */
+	StandardChunkHeader header;
+
+}	GenerationChunkData;
+
+
+/*
+ * GenerationIsValid
+ *		True iff set is valid allocation set.
+ */
+#define GenerationIsValid(set) PointerIsValid(set)
+
+#define GenerationPointerGetChunk(ptr) \
+					((GenerationChunk)(((char *)(ptr)) - Generation_CHUNKHDRSZ))
+#define GenerationChunkGetPointer(chk) \
+					((GenerationPointer)(((char *)(chk)) + Generation_CHUNKHDRSZ))
+
+/*
+ * These functions implement the MemoryContext API for Generation contexts.
+ */
+static void *GenerationAlloc(MemoryContext context, Size size);
+static void GenerationFree(MemoryContext context, void *pointer);
+static void *GenerationRealloc(MemoryContext context, void *pointer, Size size);
+static void GenerationInit(MemoryContext context);
+static void GenerationReset(MemoryContext context);
+static void GenerationDelete(MemoryContext context);
+static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
+static bool GenerationIsEmpty(MemoryContext context);
+static void GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void GenerationCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Generation contexts.
+ */
+static MemoryContextMethods GenerationMethods = {
+	GenerationAlloc,
+	GenerationFree,
+	GenerationRealloc,
+	GenerationInit,
+	GenerationReset,
+	GenerationDelete,
+	GenerationGetChunkSpace,
+	GenerationIsEmpty,
+	GenerationStats
+#ifdef MEMORY_CONTEXT_CHECKING
+	,GenerationCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define GenerationFreeInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationFree: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#define GenerationAllocInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationAlloc: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#else
+#define GenerationFreeInfo(_cxt, _chunk)
+#define GenerationAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * Public routines
+ */
+
+
+/*
+ * GenerationContextCreate
+ *		Create a new Generation context.
+ */
+MemoryContext
+GenerationContextCreate(MemoryContext parent,
+				 const char *name,
+				 Size blockSize)
+{
+	Generation			set;
+
+	/*
+	 * First, validate allocation parameters.  (If we're going to throw an
+	 * error, we should do so before the context is created, not after.)  We
+	 * somewhat arbitrarily enforce a minimum 1K block size, mostly because
+	 * that's what AllocSet does.
+	 */
+	if (blockSize != MAXALIGN(blockSize) ||
+		blockSize < 1024 ||
+		!AllocHugeSizeIsValid(blockSize))
+		elog(ERROR, "invalid blockSize for memory context: %zu",
+			 blockSize);
+
+	/* Do the type-independent part of context creation */
+	set = (Generation) MemoryContextCreate(T_GenerationContext,
+									sizeof(GenerationContext),
+									&GenerationMethods,
+									parent,
+									name);
+
+	set->blockSize = blockSize;
+	set->block = NULL;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * GenerationInit
+ *		Context-type-specific initialization routine.
+ */
+static void
+GenerationInit(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+
+	dlist_init(&set->blocks);
+}
+
+/*
+ * GenerationReset
+ *		Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+GenerationReset(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+	dlist_mutable_iter miter;
+
+	AssertArg(GenerationIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Check for corruption and leaks before freeing */
+	GenerationCheck(context);
+#endif
+
+	dlist_foreach_modify(miter, &set->blocks)
+	{
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, miter.cur);
+
+		dlist_delete(miter.cur);
+
+		/* Normal case, release the block */
+#ifdef CLOBBER_FREED_MEMORY
+		wipe_mem(block, set->blockSize);
+#endif
+
+		free(block);
+	}
+
+	set->block = NULL;
+
+	Assert(dlist_is_empty(&set->blocks));
+}
+
+/*
+ * GenerationDelete
+ *		Frees all memory which is allocated in the given set, in preparation
+ *		for deletion of the set. We simply call GenerationReset() which does all the
+ *		dirty work.
+ */
+static void
+GenerationDelete(MemoryContext context)
+{
+	/* just reset (although not really necessary) */
+	GenerationReset(context);
+}
+
+/*
+ * GenerationAlloc
+ *		Returns pointer to allocated memory of given size or NULL if
+ *		request could not be completed; memory is added to the set.
+ *
+ * No request may exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - Generation_BLOCKHDRSZ - Generation_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ */
+static void *
+GenerationAlloc(MemoryContext context, Size size)
+{
+	Generation			set = (Generation) context;
+	GenerationBlock	block;
+	GenerationChunk	chunk;
+	Size		chunk_size = MAXALIGN(size);
+
+	/* is it an over-sized chunk? if yes, allocate special block */
+	if (chunk_size > set->blockSize / 8)
+	{
+		Size		blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ;
+
+		block = (GenerationBlock) malloc(blksize);
+		if (block == NULL)
+			return NULL;
+
+		/* block with a single (used) chunk */
+		block->nchunks = 1;
+		block->nfree = 0;
+
+		/* the block is completely full */
+		block->freeptr = block->endptr = ((char *) block) + blksize;
+
+		chunk = (GenerationChunk) (((char *) block) + Generation_BLOCKHDRSZ);
+		chunk->header.context = (MemoryContext) set;
+		chunk->header.size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+		/* Valgrind: Will be made NOACCESS below. */
+		chunk->header.requested_size = size;
+		/* set mark to catch clobber of "unused" space */
+		if (size < chunk_size)
+			set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* fill the allocated space with junk */
+		randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+		/* add the block to the list of allocated blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		GenerationAllocInfo(set, chunk);
+
+		/*
+		 * Chunk header public fields remain DEFINED.  The requested
+		 * allocation itself can be NOACCESS or UNDEFINED; our caller will
+		 * soon make it UNDEFINED.  Make extra space at the end of the chunk,
+		 * if any, NOACCESS.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS((char *) chunk + Generation_CHUNK_PUBLIC,
+							 chunk_size + Generation_CHUNKHDRSZ - Generation_CHUNK_PUBLIC);
+
+		return GenerationChunkGetPointer(chunk);
+	}
+
+	/*
+	 * Not an over-sized chunk. Is there enough space on the current block? If
+	 * not, allocate a new "regular" block.
+	 */
+	block = set->block;
+
+	if ((block == NULL) ||
+		(block->endptr - block->freeptr) < Generation_CHUNKHDRSZ + chunk_size)
+	{
+		Size		blksize = set->blockSize;
+
+		block = (GenerationBlock) malloc(blksize);
+
+		if (block == NULL)
+			return NULL;
+
+		block->nchunks = 0;
+		block->nfree = 0;
+
+		block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ;
+		block->endptr = ((char *) block) + blksize;
+
+		/* Mark unallocated space NOACCESS. */
+		VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
+								   blksize - Generation_BLOCKHDRSZ);
+
+		/* add it to the doubly-linked list of blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		/* and also use it as the current allocation block */
+		set->block = block;
+	}
+
+	/* we're supposed to have a block with enough free space now */
+	Assert(block != NULL);
+	Assert((block->endptr - block->freeptr) >= Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk = (GenerationChunk) block->freeptr;
+
+	block->nchunks += 1;
+	block->freeptr += (Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk->block = block;
+
+	chunk->header.context = (MemoryContext) set;
+	chunk->header.size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Valgrind: Free list requested_size should be DEFINED. */
+	chunk->header.requested_size = size;
+	VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+							   sizeof(chunk->header.requested_size));
+	/* set mark to catch clobber of "unused" space */
+	if (size < chunk->header.size)
+		set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+	GenerationAllocInfo(set, chunk);
+	return GenerationChunkGetPointer(chunk);
+}
+
+/*
+ * GenerationFree
+ *		Update number of chunks on the block, and if all chunks on the block
+ *		are freeed then discard the block.
+ */
+static void
+GenerationFree(MemoryContext context, void *pointer)
+{
+	Generation			set = (Generation) context;
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+	GenerationBlock	block = chunk->block;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+							  sizeof(chunk->header.requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->header.requested_size < chunk->header.size)
+		if (!sentinel_ok(pointer, chunk->header.requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 set->header.name, chunk);
+#endif
+
+#ifdef CLOBBER_FREED_MEMORY
+	wipe_mem(pointer, chunk->header.size);
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Reset requested_size to 0 in chunks that are on freelist */
+	chunk->header.requested_size = 0;
+#endif
+
+	block->nfree += 1;
+
+	Assert(block->nchunks > 0);
+	Assert(block->nfree <= block->nchunks);
+
+	/* If there are still allocated chunks on the block, we're done. */
+	if (block->nfree < block->nchunks)
+		return;
+
+	/*
+	 * The block is empty, so let's get rid of it. First remove it from the
+	 * list of blocks, then return it to malloc().
+	 */
+	dlist_delete(&block->node);
+
+	/* Also make sure the block is not marked as the current block. */
+	if (set->block == block)
+		set->block = NULL;
+
+	free(block);
+}
+
+/*
+ * GenerationRealloc
+ *		When handling repalloc, we simply allocate a new chunk, copy the data
+ *		and discard the old one. The only exception is when the new size fits
+ *		into the old chunk - in that case we just update chunk header.
+ */
+static void *
+GenerationRealloc(MemoryContext context, void *pointer, Size size)
+{
+	Generation			set = (Generation) context;
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+	Size		oldsize = chunk->header.size;
+	GenerationPointer	newPointer;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+							  sizeof(chunk->header.requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->header.requested_size < oldsize)
+		if (!sentinel_ok(pointer, chunk->header.requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 set->header.name, chunk);
+#endif
+
+	/*
+	 * Maybe the allocated area already is >= the new size.  (In particular,
+	 * we always fall out here if the requested size is a decrease.)
+	 *
+	 * This memory context is not use the power-of-2 chunk sizing and instead
+	 * carves the chunks to be as small as possible, so most repalloc() calls
+	 * will end up in the palloc/memcpy/pfree branch.
+	 *
+	 * XXX Perhaps we should annotate this condition with unlikely()?
+	 */
+	if (oldsize >= size)
+	{
+#ifdef MEMORY_CONTEXT_CHECKING
+		Size		oldrequest = chunk->header.requested_size;
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* We can only fill the extra space if we know the prior request */
+		if (size > oldrequest)
+			randomize_mem((char *) pointer + oldrequest,
+						  size - oldrequest);
+#endif
+
+		chunk->header.requested_size = size;
+		VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+								   sizeof(chunk->header.requested_size));
+
+		/*
+		 * If this is an increase, mark any newly-available part UNDEFINED.
+		 * Otherwise, mark the obsolete part NOACCESS.
+		 */
+		if (size > oldrequest)
+			VALGRIND_MAKE_MEM_UNDEFINED((char *) pointer + oldrequest,
+										size - oldrequest);
+		else
+			VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size,
+									   oldsize - size);
+
+		/* set mark to catch clobber of "unused" space */
+		if (size < oldsize)
+			set_sentinel(pointer, size);
+#else							/* !MEMORY_CONTEXT_CHECKING */
+
+		/*
+		 * We don't have the information to determine whether we're growing
+		 * the old request or shrinking it, so we conservatively mark the
+		 * entire new allocation DEFINED.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS(pointer, oldsize);
+		VALGRIND_MAKE_MEM_DEFINED(pointer, size);
+#endif
+
+		return pointer;
+	}
+
+	/* allocate new chunk */
+	newPointer = GenerationAlloc((MemoryContext) set, size);
+
+	/* leave immediately if request was not completed */
+	if (newPointer == NULL)
+		return NULL;
+
+	/*
+	 * GenerationSetAlloc() just made the region NOACCESS.  Change it to UNDEFINED
+	 * for the moment; memcpy() will then transfer definedness from the old
+	 * allocation to the new.  If we know the old allocation, copy just that
+	 * much.  Otherwise, make the entire old chunk defined to avoid errors as
+	 * we copy the currently-NOACCESS trailing bytes.
+	 */
+	VALGRIND_MAKE_MEM_UNDEFINED(newPointer, size);
+#ifdef MEMORY_CONTEXT_CHECKING
+	oldsize = chunk->header.requested_size;
+#else
+	VALGRIND_MAKE_MEM_DEFINED(pointer, oldsize);
+#endif
+
+	/* transfer existing data (certain to fit) */
+	memcpy(newPointer, pointer, oldsize);
+
+	/* free old chunk */
+	GenerationFree((MemoryContext) set, pointer);
+
+	return newPointer;
+}
+
+/*
+ * GenerationGetChunkSpace
+ *		Given a currently-allocated chunk, determine the total space
+ *		it occupies (including all memory-allocation overhead).
+ */
+static Size
+GenerationGetChunkSpace(MemoryContext context, void *pointer)
+{
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+
+	return chunk->header.size + Generation_CHUNKHDRSZ;
+}
+
+/*
+ * GenerationIsEmpty
+ *		Is an Generation empty of any allocated space?
+ */
+static bool
+GenerationIsEmpty(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+
+	return dlist_is_empty(&set->blocks);
+}
+
+/*
+ * GenerationStats
+ *		Compute stats about memory consumption of an Generation.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Generation into *totals.
+ *
+ * XXX freespace only accounts for empty space at the end of the block, not
+ * space of freed chunks (which is unknown).
+ */
+static void
+GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals)
+{
+	Generation			set = (Generation) context;
+
+	Size		nblocks = 0;
+	Size		nchunks = 0;
+	Size		nfreechunks = 0;
+	Size		totalspace = 0;
+	Size		freespace = 0;
+
+	dlist_iter	iter;
+
+	dlist_foreach(iter, &set->blocks)
+	{
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, iter.cur);
+
+		nblocks++;
+		nchunks += block->nchunks;
+		nfreechunks += block->nfree;
+		totalspace += set->blockSize;
+		freespace += (block->endptr - block->freeptr);
+	}
+
+	if (print)
+	{
+		int			i;
+
+		for (i = 0; i < level; i++)
+			fprintf(stderr, "  ");
+		fprintf(stderr,
+			"Generation: %s: %zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
+				set->header.name, totalspace, nblocks, nchunks, freespace,
+				nfreechunks, totalspace - freespace);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += nblocks;
+		totals->freechunks += nfreechunks;
+		totals->totalspace += totalspace;
+		totals->freespace += freespace;
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * GenerationCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+GenerationCheck(MemoryContext context)
+{
+	Generation			Generation = (Generation) context;
+	char	   *name = Generation->header.name;
+	dlist_iter	iter;
+
+	/* walk all blocks in this context */
+	dlist_foreach(iter, &Generation->blocks)
+	{
+		int			nfree,
+					nchunks;
+		char	   *ptr;
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, iter.cur);
+
+		/* We can't free more chunks than allocated. */
+		if (block->nfree <= block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p exceeds %d allocated",
+				 name, block->nfree, block, block->nchunks);
+
+		/* Now walk through the chunks and count them. */
+		nfree = 0;
+		nchunks = 0;
+		ptr = ((char *) block) + Generation_BLOCKHDRSZ;
+
+		while (ptr < block->freeptr)
+		{
+			GenerationChunk	chunk = (GenerationChunk)ptr;
+
+			/* move to the next chunk */
+			ptr += (chunk->header.size + Generation_CHUNKHDRSZ);
+
+			/* chunks have both block and context pointers, so check both */
+			if (chunk->block != block)
+				elog(WARNING, "problem in Generation %s: bogus block link in block %p, chunk %p",
+					 name, block, chunk);
+
+			if (chunk->header.context != (MemoryContext) Generation)
+				elog(WARNING, "problem in Generation %s: bogus context link in block %p, chunk %p",
+					 name, block, chunk);
+
+			nchunks += 1;
+
+			/* if requested_size==0, the chunk was freed */
+			if (chunk->header.requested_size > 0)
+			{
+				/* if the chunk was not freed, we can trigger valgrind checks */
+				VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+									   sizeof(chunk->header.requested_size));
+
+				/* we're in a no-freelist branch */
+				VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+									   sizeof(chunk->header.requested_size));
+
+				/* now make sure the chunk size is correct */
+				if (chunk->header.size != MAXALIGN(chunk->header.requested_size))
+					elog(WARNING, "problem in Generation %s: bogus chunk size in block %p, chunk %p",
+						 name, block, chunk);
+
+				/* there might be sentinel (thanks to alignment) */
+				if (chunk->header.requested_size < chunk->header.size &&
+					!sentinel_ok(chunk, Generation_CHUNKHDRSZ + chunk->header.requested_size))
+					elog(WARNING, "problem in Generation %s: detected write past chunk end in block %p, chunk %p",
+						 name, block, chunk);
+			}
+			else
+				nfree += 1;
+		}
+
+		/*
+		 * Make sure we got the expected number of allocated and free chunks
+		 * (as tracked in the block header).
+		 */
+		if (nchunks != block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of allocated chunks %d in block %p does not match header %d",
+				 name, nchunks, block, block->nchunks);
+
+		if (nfree != block->nfree)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p does not match header %d",
+				 name, nfree, block, block->nfree);
+	}
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
diff --git a/src/backend/utils/mmgr/generation.c b/src/backend/utils/mmgr/generation.c
new file mode 100644
index 0000000..7a25eb2
--- /dev/null
+++ b/src/backend/utils/mmgr/generation.c
@@ -0,0 +1,758 @@
+/*-------------------------------------------------------------------------
+ *
+ * generation.c
+ *	  Generational allocator definitions.
+ *
+ * Generation is a custom MemoryContext implementation designed for cases of
+ * chunks with similar lifespan.
+ *
+ * Portions Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/mmgr/Generation.c
+ *
+ *
+ *	This memory context is based on the assumption that the allocated chunks
+ *	have similar lifespan, i.e. that chunks allocated close from each other
+ *	(by time) will also be freed in close proximity, and mostly in the same
+ *	order. This is typical for various queue-like use cases, i.e. when tuples
+ *	are constructed, processed and then thrown away.
+ *
+ *	The memory context uses a very simple approach to free space management.
+ *	Instead of a complex global freelist, each block tracks a number
+ *	of allocated and freed chunks. The space release by freed chunks is not
+ *	reused, and once all chunks are freed (i.e. when nallocated == nfreed),
+ *	the whole block is thrown away. When the allocated chunks have similar
+ *	lifespan, this works very well and is extremely cheap.
+ *
+ *	The current implementation only uses a fixed block size - maybe it should
+ *	adapt a min/max block size range, and grow the blocks automatically.
+ *	It already uses dedicated blocks for oversized chunks.
+ *
+ *	XXX It might be possible to improve this by keeping a small freelist for
+ *	only a small number of recent blocks, but it's not clear it's worth the
+ *	additional complexity.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/memdebug.h"
+#include "utils/memutils.h"
+#include "lib/ilist.h"
+
+
+#define Generation_BLOCKHDRSZ	MAXALIGN(sizeof(GenerationBlockData))
+#define Generation_CHUNKHDRSZ	MAXALIGN(sizeof(GenerationChunkData))
+
+/* Portion of Generation_CHUNKHDRSZ examined outside Generation.c. */
+#define Generation_CHUNK_PUBLIC	\
+	(offsetof(GenerationChunkData, size) + sizeof(Size))
+
+/* Portion of Generation_CHUNKHDRSZ excluding trailing padding. */
+#ifdef MEMORY_CONTEXT_CHECKING
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunkData, requested_size) + sizeof(Size))
+#else
+#define Generation_CHUNK_USED	\
+	(offsetof(GenerationChunkData, size) + sizeof(Size))
+#endif
+
+typedef struct GenerationBlockData *GenerationBlock;	/* forward reference */
+typedef struct GenerationChunkData *GenerationChunk;
+
+typedef void *GenerationPointer;
+
+/*
+ * GenerationContext is a simple memory context not reusing allocated chunks, and
+ * freeing blocks once all chunks are freed.
+ */
+typedef struct GenerationContext
+{
+	MemoryContextData header;	/* Standard memory-context fields */
+
+	/* Generationerational context parameters */
+	Size		blockSize;		/* block size */
+
+	GenerationBlock	block;			/* current (most recently allocated) block */
+	dlist_head	blocks;			/* list of blocks */
+
+}	GenerationContext;
+
+typedef GenerationContext *Generation;
+
+/*
+ * GenerationBlockData
+ *		A GenerationBlock is the unit of memory that is obtained by Generation.c
+ *		from malloc().  It contains one or more GenerationChunks, which are
+ *		the units requested by palloc() and freed by pfree().  GenerationChunks
+ *		cannot be returned to malloc() individually, instead pfree()
+ *		updates a free counter on a block and when all chunks on a block
+ *		are freed the whole block is returned to malloc().
+ *
+ *		GenerationBlockData is the header data for a block --- the usable space
+ *		within the block begins at the next alignment boundary.
+ */
+typedef struct GenerationBlockData
+{
+	dlist_node	node;			/* doubly-linked list */
+	int			nchunks;		/* number of chunks in the block */
+	int			nfree;			/* number of free chunks */
+	char	   *freeptr;		/* start of free space in this block */
+	char	   *endptr;			/* end of space in this block */
+}	GenerationBlockData;
+
+/*
+ * GenerationChunk
+ *		The prefix of each piece of memory in an GenerationBlock
+ */
+typedef struct GenerationChunkData
+{
+	/* block owning this chunk */
+	void	   *block;
+
+	/* include StandardChunkHeader because mcxt.c expects that */
+	StandardChunkHeader header;
+
+}	GenerationChunkData;
+
+
+/*
+ * GenerationIsValid
+ *		True iff set is valid allocation set.
+ */
+#define GenerationIsValid(set) PointerIsValid(set)
+
+#define GenerationPointerGetChunk(ptr) \
+					((GenerationChunk)(((char *)(ptr)) - Generation_CHUNKHDRSZ))
+#define GenerationChunkGetPointer(chk) \
+					((GenerationPointer)(((char *)(chk)) + Generation_CHUNKHDRSZ))
+
+/*
+ * These functions implement the MemoryContext API for Generation contexts.
+ */
+static void *GenerationAlloc(MemoryContext context, Size size);
+static void GenerationFree(MemoryContext context, void *pointer);
+static void *GenerationRealloc(MemoryContext context, void *pointer, Size size);
+static void GenerationInit(MemoryContext context);
+static void GenerationReset(MemoryContext context);
+static void GenerationDelete(MemoryContext context);
+static Size GenerationGetChunkSpace(MemoryContext context, void *pointer);
+static bool GenerationIsEmpty(MemoryContext context);
+static void GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals);
+
+#ifdef MEMORY_CONTEXT_CHECKING
+static void GenerationCheck(MemoryContext context);
+#endif
+
+/*
+ * This is the virtual function table for Generation contexts.
+ */
+static MemoryContextMethods GenerationMethods = {
+	GenerationAlloc,
+	GenerationFree,
+	GenerationRealloc,
+	GenerationInit,
+	GenerationReset,
+	GenerationDelete,
+	GenerationGetChunkSpace,
+	GenerationIsEmpty,
+	GenerationStats
+#ifdef MEMORY_CONTEXT_CHECKING
+	,GenerationCheck
+#endif
+};
+
+/* ----------
+ * Debug macros
+ * ----------
+ */
+#ifdef HAVE_ALLOCINFO
+#define GenerationFreeInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationFree: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#define GenerationAllocInfo(_cxt, _chunk) \
+			fprintf(stderr, "GenerationAlloc: %s: %p, %lu\n", \
+				(_cxt)->header.name, (_chunk), (_chunk)->size)
+#else
+#define GenerationFreeInfo(_cxt, _chunk)
+#define GenerationAllocInfo(_cxt, _chunk)
+#endif
+
+
+/*
+ * Public routines
+ */
+
+
+/*
+ * GenerationContextCreate
+ *		Create a new Generation context.
+ */
+MemoryContext
+GenerationContextCreate(MemoryContext parent,
+				 const char *name,
+				 Size blockSize)
+{
+	Generation			set;
+
+	/*
+	 * First, validate allocation parameters.  (If we're going to throw an
+	 * error, we should do so before the context is created, not after.)  We
+	 * somewhat arbitrarily enforce a minimum 1K block size, mostly because
+	 * that's what AllocSet does.
+	 */
+	if (blockSize != MAXALIGN(blockSize) ||
+		blockSize < 1024 ||
+		!AllocHugeSizeIsValid(blockSize))
+		elog(ERROR, "invalid blockSize for memory context: %zu",
+			 blockSize);
+
+	/* Do the type-independent part of context creation */
+	set = (Generation) MemoryContextCreate(T_GenerationContext,
+									sizeof(GenerationContext),
+									&GenerationMethods,
+									parent,
+									name);
+
+	set->blockSize = blockSize;
+	set->block = NULL;
+
+	return (MemoryContext) set;
+}
+
+/*
+ * GenerationInit
+ *		Context-type-specific initialization routine.
+ */
+static void
+GenerationInit(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+
+	dlist_init(&set->blocks);
+}
+
+/*
+ * GenerationReset
+ *		Frees all memory which is allocated in the given set.
+ *
+ * The code simply frees all the blocks in the context - we don't keep any
+ * keeper blocks or anything like that.
+ */
+static void
+GenerationReset(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+	dlist_mutable_iter miter;
+
+	AssertArg(GenerationIsValid(set));
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Check for corruption and leaks before freeing */
+	GenerationCheck(context);
+#endif
+
+	dlist_foreach_modify(miter, &set->blocks)
+	{
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, miter.cur);
+
+		dlist_delete(miter.cur);
+
+		/* Normal case, release the block */
+#ifdef CLOBBER_FREED_MEMORY
+		wipe_mem(block, set->blockSize);
+#endif
+
+		free(block);
+	}
+
+	set->block = NULL;
+
+	Assert(dlist_is_empty(&set->blocks));
+}
+
+/*
+ * GenerationDelete
+ *		Frees all memory which is allocated in the given set, in preparation
+ *		for deletion of the set. We simply call GenerationReset() which does all the
+ *		dirty work.
+ */
+static void
+GenerationDelete(MemoryContext context)
+{
+	/* just reset (although not really necessary) */
+	GenerationReset(context);
+}
+
+/*
+ * GenerationAlloc
+ *		Returns pointer to allocated memory of given size or NULL if
+ *		request could not be completed; memory is added to the set.
+ *
+ * No request may exceed:
+ *		MAXALIGN_DOWN(SIZE_MAX) - Generation_BLOCKHDRSZ - Generation_CHUNKHDRSZ
+ * All callers use a much-lower limit.
+ */
+static void *
+GenerationAlloc(MemoryContext context, Size size)
+{
+	Generation			set = (Generation) context;
+	GenerationBlock	block;
+	GenerationChunk	chunk;
+	Size		chunk_size = MAXALIGN(size);
+
+	/* is it an over-sized chunk? if yes, allocate special block */
+	if (chunk_size > set->blockSize / 8)
+	{
+		Size		blksize = chunk_size + Generation_BLOCKHDRSZ + Generation_CHUNKHDRSZ;
+
+		block = (GenerationBlock) malloc(blksize);
+		if (block == NULL)
+			return NULL;
+
+		/* block with a single (used) chunk */
+		block->nchunks = 1;
+		block->nfree = 0;
+
+		/* the block is completely full */
+		block->freeptr = block->endptr = ((char *) block) + blksize;
+
+		chunk = (GenerationChunk) (((char *) block) + Generation_BLOCKHDRSZ);
+		chunk->header.context = (MemoryContext) set;
+		chunk->header.size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+		/* Valgrind: Will be made NOACCESS below. */
+		chunk->header.requested_size = size;
+		/* set mark to catch clobber of "unused" space */
+		if (size < chunk_size)
+			set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* fill the allocated space with junk */
+		randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+		/* add the block to the list of allocated blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		GenerationAllocInfo(set, chunk);
+
+		/*
+		 * Chunk header public fields remain DEFINED.  The requested
+		 * allocation itself can be NOACCESS or UNDEFINED; our caller will
+		 * soon make it UNDEFINED.  Make extra space at the end of the chunk,
+		 * if any, NOACCESS.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS((char *) chunk + Generation_CHUNK_PUBLIC,
+							 chunk_size + Generation_CHUNKHDRSZ - Generation_CHUNK_PUBLIC);
+
+		return GenerationChunkGetPointer(chunk);
+	}
+
+	/*
+	 * Not an over-sized chunk. Is there enough space on the current block? If
+	 * not, allocate a new "regular" block.
+	 */
+	block = set->block;
+
+	if ((block == NULL) ||
+		(block->endptr - block->freeptr) < Generation_CHUNKHDRSZ + chunk_size)
+	{
+		Size		blksize = set->blockSize;
+
+		block = (GenerationBlock) malloc(blksize);
+
+		if (block == NULL)
+			return NULL;
+
+		block->nchunks = 0;
+		block->nfree = 0;
+
+		block->freeptr = ((char *) block) + Generation_BLOCKHDRSZ;
+		block->endptr = ((char *) block) + blksize;
+
+		/* Mark unallocated space NOACCESS. */
+		VALGRIND_MAKE_MEM_NOACCESS(block->freeptr,
+								   blksize - Generation_BLOCKHDRSZ);
+
+		/* add it to the doubly-linked list of blocks */
+		dlist_push_head(&set->blocks, &block->node);
+
+		/* and also use it as the current allocation block */
+		set->block = block;
+	}
+
+	/* we're supposed to have a block with enough free space now */
+	Assert(block != NULL);
+	Assert((block->endptr - block->freeptr) >= Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk = (GenerationChunk) block->freeptr;
+
+	block->nchunks += 1;
+	block->freeptr += (Generation_CHUNKHDRSZ + chunk_size);
+
+	chunk->block = block;
+
+	chunk->header.context = (MemoryContext) set;
+	chunk->header.size = chunk_size;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Valgrind: Free list requested_size should be DEFINED. */
+	chunk->header.requested_size = size;
+	VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+							   sizeof(chunk->header.requested_size));
+	/* set mark to catch clobber of "unused" space */
+	if (size < chunk->header.size)
+		set_sentinel(GenerationChunkGetPointer(chunk), size);
+#endif
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+	/* fill the allocated space with junk */
+	randomize_mem((char *) GenerationChunkGetPointer(chunk), size);
+#endif
+
+	GenerationAllocInfo(set, chunk);
+	return GenerationChunkGetPointer(chunk);
+}
+
+/*
+ * GenerationFree
+ *		Update number of chunks on the block, and if all chunks on the block
+ *		are freeed then discard the block.
+ */
+static void
+GenerationFree(MemoryContext context, void *pointer)
+{
+	Generation			set = (Generation) context;
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+	GenerationBlock	block = chunk->block;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+							  sizeof(chunk->header.requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->header.requested_size < chunk->header.size)
+		if (!sentinel_ok(pointer, chunk->header.requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 set->header.name, chunk);
+#endif
+
+#ifdef CLOBBER_FREED_MEMORY
+	wipe_mem(pointer, chunk->header.size);
+#endif
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	/* Reset requested_size to 0 in chunks that are on freelist */
+	chunk->header.requested_size = 0;
+#endif
+
+	block->nfree += 1;
+
+	Assert(block->nchunks > 0);
+	Assert(block->nfree <= block->nchunks);
+
+	/* If there are still allocated chunks on the block, we're done. */
+	if (block->nfree < block->nchunks)
+		return;
+
+	/*
+	 * The block is empty, so let's get rid of it. First remove it from the
+	 * list of blocks, then return it to malloc().
+	 */
+	dlist_delete(&block->node);
+
+	/* Also make sure the block is not marked as the current block. */
+	if (set->block == block)
+		set->block = NULL;
+
+	free(block);
+}
+
+/*
+ * GenerationRealloc
+ *		When handling repalloc, we simply allocate a new chunk, copy the data
+ *		and discard the old one. The only exception is when the new size fits
+ *		into the old chunk - in that case we just update chunk header.
+ */
+static void *
+GenerationRealloc(MemoryContext context, void *pointer, Size size)
+{
+	Generation			set = (Generation) context;
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+	Size		oldsize = chunk->header.size;
+	GenerationPointer	newPointer;
+
+#ifdef MEMORY_CONTEXT_CHECKING
+	VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+							  sizeof(chunk->header.requested_size));
+	/* Test for someone scribbling on unused space in chunk */
+	if (chunk->header.requested_size < oldsize)
+		if (!sentinel_ok(pointer, chunk->header.requested_size))
+			elog(WARNING, "detected write past chunk end in %s %p",
+				 set->header.name, chunk);
+#endif
+
+	/*
+	 * Maybe the allocated area already is >= the new size.  (In particular,
+	 * we always fall out here if the requested size is a decrease.)
+	 *
+	 * This memory context is not use the power-of-2 chunk sizing and instead
+	 * carves the chunks to be as small as possible, so most repalloc() calls
+	 * will end up in the palloc/memcpy/pfree branch.
+	 *
+	 * XXX Perhaps we should annotate this condition with unlikely()?
+	 */
+	if (oldsize >= size)
+	{
+#ifdef MEMORY_CONTEXT_CHECKING
+		Size		oldrequest = chunk->header.requested_size;
+
+#ifdef RANDOMIZE_ALLOCATED_MEMORY
+		/* We can only fill the extra space if we know the prior request */
+		if (size > oldrequest)
+			randomize_mem((char *) pointer + oldrequest,
+						  size - oldrequest);
+#endif
+
+		chunk->header.requested_size = size;
+		VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+								   sizeof(chunk->header.requested_size));
+
+		/*
+		 * If this is an increase, mark any newly-available part UNDEFINED.
+		 * Otherwise, mark the obsolete part NOACCESS.
+		 */
+		if (size > oldrequest)
+			VALGRIND_MAKE_MEM_UNDEFINED((char *) pointer + oldrequest,
+										size - oldrequest);
+		else
+			VALGRIND_MAKE_MEM_NOACCESS((char *) pointer + size,
+									   oldsize - size);
+
+		/* set mark to catch clobber of "unused" space */
+		if (size < oldsize)
+			set_sentinel(pointer, size);
+#else							/* !MEMORY_CONTEXT_CHECKING */
+
+		/*
+		 * We don't have the information to determine whether we're growing
+		 * the old request or shrinking it, so we conservatively mark the
+		 * entire new allocation DEFINED.
+		 */
+		VALGRIND_MAKE_MEM_NOACCESS(pointer, oldsize);
+		VALGRIND_MAKE_MEM_DEFINED(pointer, size);
+#endif
+
+		return pointer;
+	}
+
+	/* allocate new chunk */
+	newPointer = GenerationAlloc((MemoryContext) set, size);
+
+	/* leave immediately if request was not completed */
+	if (newPointer == NULL)
+		return NULL;
+
+	/*
+	 * GenerationSetAlloc() just made the region NOACCESS.  Change it to UNDEFINED
+	 * for the moment; memcpy() will then transfer definedness from the old
+	 * allocation to the new.  If we know the old allocation, copy just that
+	 * much.  Otherwise, make the entire old chunk defined to avoid errors as
+	 * we copy the currently-NOACCESS trailing bytes.
+	 */
+	VALGRIND_MAKE_MEM_UNDEFINED(newPointer, size);
+#ifdef MEMORY_CONTEXT_CHECKING
+	oldsize = chunk->header.requested_size;
+#else
+	VALGRIND_MAKE_MEM_DEFINED(pointer, oldsize);
+#endif
+
+	/* transfer existing data (certain to fit) */
+	memcpy(newPointer, pointer, oldsize);
+
+	/* free old chunk */
+	GenerationFree((MemoryContext) set, pointer);
+
+	return newPointer;
+}
+
+/*
+ * GenerationGetChunkSpace
+ *		Given a currently-allocated chunk, determine the total space
+ *		it occupies (including all memory-allocation overhead).
+ */
+static Size
+GenerationGetChunkSpace(MemoryContext context, void *pointer)
+{
+	GenerationChunk	chunk = GenerationPointerGetChunk(pointer);
+
+	return chunk->header.size + Generation_CHUNKHDRSZ;
+}
+
+/*
+ * GenerationIsEmpty
+ *		Is an Generation empty of any allocated space?
+ */
+static bool
+GenerationIsEmpty(MemoryContext context)
+{
+	Generation			set = (Generation) context;
+
+	return dlist_is_empty(&set->blocks);
+}
+
+/*
+ * GenerationStats
+ *		Compute stats about memory consumption of an Generation.
+ *
+ * level: recursion level (0 at top level); used for print indentation.
+ * print: true to print stats to stderr.
+ * totals: if not NULL, add stats about this Generation into *totals.
+ *
+ * XXX freespace only accounts for empty space at the end of the block, not
+ * space of freed chunks (which is unknown).
+ */
+static void
+GenerationStats(MemoryContext context, int level, bool print,
+		 MemoryContextCounters *totals)
+{
+	Generation			set = (Generation) context;
+
+	Size		nblocks = 0;
+	Size		nchunks = 0;
+	Size		nfreechunks = 0;
+	Size		totalspace = 0;
+	Size		freespace = 0;
+
+	dlist_iter	iter;
+
+	dlist_foreach(iter, &set->blocks)
+	{
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, iter.cur);
+
+		nblocks++;
+		nchunks += block->nchunks;
+		nfreechunks += block->nfree;
+		totalspace += set->blockSize;
+		freespace += (block->endptr - block->freeptr);
+	}
+
+	if (print)
+	{
+		int			i;
+
+		for (i = 0; i < level; i++)
+			fprintf(stderr, "  ");
+		fprintf(stderr,
+			"Generation: %s: %zu total in %zd blocks (%zd chunks); %zu free (%zd chunks); %zu used\n",
+				set->header.name, totalspace, nblocks, nchunks, freespace,
+				nfreechunks, totalspace - freespace);
+	}
+
+	if (totals)
+	{
+		totals->nblocks += nblocks;
+		totals->freechunks += nfreechunks;
+		totals->totalspace += totalspace;
+		totals->freespace += freespace;
+	}
+}
+
+
+#ifdef MEMORY_CONTEXT_CHECKING
+
+/*
+ * GenerationCheck
+ *		Walk through chunks and check consistency of memory.
+ *
+ * NOTE: report errors as WARNING, *not* ERROR or FATAL.  Otherwise you'll
+ * find yourself in an infinite loop when trouble occurs, because this
+ * routine will be entered again when elog cleanup tries to release memory!
+ */
+static void
+GenerationCheck(MemoryContext context)
+{
+	Generation	gen = (Generation) context;
+	char	   *name = gen->header.name;
+	dlist_iter	iter;
+
+	/* walk all blocks in this context */
+	dlist_foreach(iter, &gen->blocks)
+	{
+		int			nfree,
+					nchunks;
+		char	   *ptr;
+		GenerationBlock	block = dlist_container(GenerationBlockData, node, iter.cur);
+
+		/* We can't free more chunks than allocated. */
+		if (block->nfree <= block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p exceeds %d allocated",
+				 name, block->nfree, block, block->nchunks);
+
+		/* Now walk through the chunks and count them. */
+		nfree = 0;
+		nchunks = 0;
+		ptr = ((char *) block) + Generation_BLOCKHDRSZ;
+
+		while (ptr < block->freeptr)
+		{
+			GenerationChunk	chunk = (GenerationChunk)ptr;
+
+			/* move to the next chunk */
+			ptr += (chunk->header.size + Generation_CHUNKHDRSZ);
+
+			/* chunks have both block and context pointers, so check both */
+			if (chunk->block != block)
+				elog(WARNING, "problem in Generation %s: bogus block link in block %p, chunk %p",
+					 name, block, chunk);
+
+			if (chunk->header.context != (MemoryContext) gen)
+				elog(WARNING, "problem in Generation %s: bogus context link in block %p, chunk %p",
+					 name, block, chunk);
+
+			nchunks += 1;
+
+			/* if requested_size==0, the chunk was freed */
+			if (chunk->header.requested_size > 0)
+			{
+				/* if the chunk was not freed, we can trigger valgrind checks */
+				VALGRIND_MAKE_MEM_DEFINED(&chunk->header.requested_size,
+									   sizeof(chunk->header.requested_size));
+
+				/* we're in a no-freelist branch */
+				VALGRIND_MAKE_MEM_NOACCESS(&chunk->header.requested_size,
+									   sizeof(chunk->header.requested_size));
+
+				/* now make sure the chunk size is correct */
+				if (chunk->header.size != MAXALIGN(chunk->header.requested_size))
+					elog(WARNING, "problem in Generation %s: bogus chunk size in block %p, chunk %p",
+						 name, block, chunk);
+
+				/* there might be sentinel (thanks to alignment) */
+				if (chunk->header.requested_size < chunk->header.size &&
+					!sentinel_ok(chunk, Generation_CHUNKHDRSZ + chunk->header.requested_size))
+					elog(WARNING, "problem in Generation %s: detected write past chunk end in block %p, chunk %p",
+						 name, block, chunk);
+			}
+			else
+				nfree += 1;
+		}
+
+		/*
+		 * Make sure we got the expected number of allocated and free chunks
+		 * (as tracked in the block header).
+		 */
+		if (nchunks != block->nchunks)
+			elog(WARNING, "problem in Generation %s: number of allocated chunks %d in block %p does not match header %d",
+				 name, nchunks, block, block->nchunks);
+
+		if (nfree != block->nfree)
+			elog(WARNING, "problem in Generation %s: number of free chunks %d in block %p does not match header %d",
+				 name, nfree, block, block->nfree);
+	}
+}
+
+#endif   /* MEMORY_CONTEXT_CHECKING */
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index fe6bc90..815a52a 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -96,6 +96,8 @@ typedef struct MemoryContextData
  */
 #define MemoryContextIsValid(context) \
 	((context) != NULL && \
-	 (IsA((context), AllocSetContext) || IsA((context), SlabContext)))
+	 (IsA((context), AllocSetContext) || \
+	  IsA((context), SlabContext) || \
+	  IsA((context), GenerationContext)))
 
 #endif   /* MEMNODES_H */
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 28aca92..2ef935a 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -279,6 +279,7 @@ typedef enum NodeTag
 	T_MemoryContext,
 	T_AllocSetContext,
 	T_SlabContext,
+	T_GenerationContext,
 
 	/*
 	 * TAGS FOR VALUE NODES (value.h)
diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h
index c931e83..83fd885 100644
--- a/src/include/replication/reorderbuffer.h
+++ b/src/include/replication/reorderbuffer.h
@@ -337,20 +337,6 @@ struct ReorderBuffer
 	MemoryContext txn_context;
 	MemoryContext tup_context;
 
-	/*
-	 * Data structure slab cache.
-	 *
-	 * We allocate/deallocate some structures very frequently, to avoid bigger
-	 * overhead we cache some unused ones here.
-	 *
-	 * The maximum number of cached entries is controlled by const variables
-	 * on top of reorderbuffer.c
-	 */
-
-	/* cached ReorderBufferTupleBufs */
-	slist_head	cached_tuplebufs;
-	Size		nr_cached_tuplebufs;
-
 	XLogRecPtr	current_restart_decoding_lsn;
 
 	/* buffer for disk<->memory conversions */
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 5223a4d..57fb47e 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -141,6 +141,11 @@ extern MemoryContext SlabContextCreate(MemoryContext parent,
 				  Size blockSize,
 				  Size chunkSize);
 
+/* gen.c */
+extern MemoryContext GenerationContextCreate(MemoryContext parent,
+				 const char *name,
+				 Size blockSize);
+
 /*
  * Recommended default alloc parameters, suitable for "ordinary" contexts
  * that might hold quite a lot of data.
-- 
2.5.5

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] PATCH: two slab-like memory allocators

Reply via email to