diff --git a/src/backend/lib/Makefile b/src/backend/lib/Makefile
index 9dad31398a..ead0755d25 100644
--- a/src/backend/lib/Makefile
+++ b/src/backend/lib/Makefile
@@ -22,6 +22,9 @@ OBJS = \
 	integerset.o \
 	knapsack.o \
 	pairingheap.o \
+	radixtree.o \
 	rbtree.o \
 
+radixtree.o: CFLAGS+=-msse2
+
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/lib/radixtree.c b/src/backend/lib/radixtree.c
new file mode 100644
index 0000000000..f1118679d6
--- /dev/null
+++ b/src/backend/lib/radixtree.c
@@ -0,0 +1,2040 @@
+/*-------------------------------------------------------------------------
+ *
+ * radixtree.c
+ *		Implementation for adaptive radix tree.
+ *
+ * This module employs the idea from the paper "The Adaptive Radix Tree: ARTful
+ * Indexing for Main-Memory Databases" by Viktor Leis, Alfons Kemper, and Thomas
+ * Neumann, 2013. The radix tree uses adaptive node sizes, a small number of node
+ * types, each with a different numbers of elements. Depending on the number of
+ * children, the appropriate node type is used.
+ *
+ * There are some differences from the proposed implementation.  For instance,
+ * this radix tree module utilizes AVX2 instruction, enabling us to use 256-bit
+ * width SIMD vector, whereas 128-bit width SIMD vector is used in the paper.
+ * Also, there is no support for path compression and lazy path expansion. The
+ * radix tree supports fixed length of the key so we don't expect the tree level
+ * wouldn't be high.
+ *
+ * The key is a 64-bit unsigned integer and the value is a Datum. Both internal
+ * nodes and leaf nodes have the identical structure. For internal tree nodes,
+ * shift > 0, store the pointer to its child node as the value. The leaf nodes,
+ * shift == 0, also have the Datum value that is specified by the user. The
+ * paper refers to this technique as "Multi-value leaves".  We choose it for
+ * simplicity and to avoid an additional pointer traversal.  It is the reason
+ * this code currently does not support variable-length keys.
+ *
+ * XXX: the radix tree node never be shrunk.
+ *
+ * Interface
+ * ---------
+ *
+ * rt_create		- Create a new, empty radix tree
+ * rt_free			- Free the radix tree
+ * rt_search		- Search a key-value pair
+ * rt_insert		- Insert a key-value pair
+ * rt_delete		- Delete a key-value pair
+ * rt_begin_iterate	- Begin iterating through all key-value pairs
+ * rt_iterate_next	- Return next key-value pair, if any
+ * rt_end_iterate	- End iteration
+ *
+ * rt_create() creates an empty radix tree in the given memory context
+ * and memory contexts for all kinds of radix tree node under the memory context.
+ *
+ * rt_iterate_next() ensures returning key-value pairs in the ascending
+ * order of the key.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/lib/radixtree.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "utils/memutils.h"
+#include "lib/radixtree.h"
+#include "lib/stringinfo.h"
+
+#if defined(__SSE2__)
+#include <emmintrin.h>          /* SSE2 intrinsics */
+#endif
+
+/* The number of bits encoded in one tree level */
+#define RT_NODE_SPAN	BITS_PER_BYTE
+
+/* The number of maximum slots in the node */
+#define RT_NODE_MAX_SLOTS (1 << RT_NODE_SPAN)
+
+/*
+ * Return the number of bits required to represent nslots slots, used
+ * nodes indexed by array lookup.
+ */
+#define RT_NODE_NSLOTS_BITS(nslots) ((nslots) / (sizeof(uint8) * BITS_PER_BYTE))
+
+/* Mask for extracting a chunk from the key */
+#define RT_CHUNK_MASK ((1 << RT_NODE_SPAN) - 1)
+
+/* Maximum shift the radix tree uses */
+#define RT_MAX_SHIFT	key_get_shift(UINT64_MAX)
+
+/* Tree level the radix tree uses */
+#define RT_MAX_LEVEL	((sizeof(uint64) * BITS_PER_BYTE) / RT_NODE_SPAN)
+
+/* Get a chunk from the key */
+#define RT_GET_KEY_CHUNK(key, shift) \
+	((uint8) (((key) >> (shift)) & RT_CHUNK_MASK))
+
+/*
+ * Mapping from the value to the bit in is-set bitmap in the node-128
+ * and node-256.
+ */
+#define RT_NODE_BITMAP_BYTE(v) ((v) / BITS_PER_BYTE)
+#define RT_NODE_BITMAP_BIT(v) (UINT64CONST(1) << ((v) % RT_NODE_SPAN))
+
+/* Enum used rt_node_search() */
+typedef enum
+{
+	RT_ACTION_FIND = 0,		/* find the key-value */
+	RT_ACTION_DELETE,			/* delete the key-value */
+} rt_action;
+
+/*
+ * Supported radix tree nodes.
+ *
+ * XXX: These are currently not well chosen. To reduce memory fragmentation
+ * smaller class should optimally fit neatly into the next larger class
+ * (except perhaps at the lowest end). Right now its
+ * 48 -> 152 -> 296 -> 1304 -> 2088 bytes for inner/leaf nodes, leading to
+ * large amounts of allocator padding with aset.c. Hence the use of slab.
+ *
+ * XXX: need to explain why we choose these node types based on benchmark
+ * results etc.
+ */
+typedef enum rt_node_kind
+{
+	RT_NODE_KIND_4 = 0,
+	RT_NODE_KIND_16,
+	RT_NODE_KIND_32,
+	RT_NODE_KIND_128,
+	RT_NODE_KIND_256
+} rt_node_kind;
+#define RT_NODE_KIND_COUNT 5
+
+/*
+ * Base type for all nodes types.
+ */
+typedef struct rt_node
+{
+	/*
+	 * Number of children.  We use uint16 to be able to indicate 256 children
+	 * at the fanout of 8.
+	 */
+	uint16		count;
+
+	/*
+	 * Shift indicates which part of the key space is represented by this
+	 * node. That is, the key is shifted by 'shift' and the lowest
+	 * RT_NODE_SPAN bits are then represented in chunk.
+	 */
+	uint8		shift;
+	uint8		chunk;
+
+	/* Size class of the node */
+	rt_node_kind kind;
+} rt_node;
+
+/* Macros for radix tree nodes */
+#define IS_LEAF_NODE(n) (((rt_node *) (n))->shift == 0)
+#define IS_EMPTY_NODE(n) (((rt_node *) (n))->count == 0)
+#define NODE_HAS_FREE_SLOT(n) \
+	(((rt_node *) (n))->count < rt_node_info[((rt_node *) (n))->kind].max_slots)
+
+/*
+ * To reduce memory usage compared to a simple radix tree with a fixed
+ * fanout we use adaptive node sides, with different storage methods
+ * for different numbers of elements.
+ */
+typedef struct rt_node_4
+{
+	rt_node n;
+
+	/* 4 children, for key chunks */
+	uint8		chunks[4];
+	Datum		slots[4];
+} rt_node_4;
+
+typedef struct rt_node_16
+{
+	rt_node n;
+
+	/* 16 children, for key chunks */
+	uint8		chunks[16];
+	Datum		slots[16];
+} rt_node_16;
+
+typedef struct rt_node_32
+{
+	rt_node n;
+
+	/* 32 children, for key chunks */
+	uint8		chunks[32];
+	Datum		slots[32];
+} rt_node_32;
+
+#define RT_NODE_128_INVALID_IDX	0xFF
+typedef struct rt_node_128
+{
+	rt_node n;
+
+	/* The index of slots for each fanout */
+	uint8		slot_idxs[RT_NODE_MAX_SLOTS];
+
+	/*
+	 * Slots for 128 children.
+	 *
+	 * Since the rt_node_xxx node is used by both inner and leaf nodes,
+	 * we need to distinguish between a null pointer in inner nodes and
+	 * a (Datum) 0 value in leaf node. isset is a bitmap to track which
+	 * slot is in use.
+	 */
+	Datum		slots[128];
+	uint8		isset[RT_NODE_NSLOTS_BITS(128)];
+} rt_node_128;
+
+typedef struct rt_node_256
+{
+	rt_node n;
+
+	/*
+	 * Slots for 256 children. The isset is a bitmap to track which slot
+	 * is in use.
+	 */
+	Datum		slots[RT_NODE_MAX_SLOTS];
+	uint8		isset[RT_NODE_NSLOTS_BITS(RT_NODE_MAX_SLOTS)];
+} rt_node_256;
+
+/* Information of each size class */
+typedef struct rt_node_info_elem
+{
+	const char *name;
+	int			max_slots;
+	Size		size;
+} rt_node_info_elem;
+
+static rt_node_info_elem rt_node_info[] =
+{
+	{"radix tree node 4", 4, sizeof(rt_node_4)},
+	{"radix tree node 16", 16, sizeof(rt_node_16)},
+	{"radix tree node 32", 32, sizeof(rt_node_32)},
+	{"radix tree node 128", 128, sizeof(rt_node_128)},
+	{"radix tree node 256", 256, sizeof(rt_node_256)},
+};
+
+/*
+ * The data structure for stacking the radix tree nodes.
+ *
+ * During deleting a key-value pair, we descend the radix tree while pushing
+ * the inner nodes. The stack can be freed by using rt_free_stack.
+ */
+typedef struct rt_stack_data
+{
+	rt_node *node;
+	struct rt_stack_data *parent;
+} rt_stack_data;
+typedef rt_stack_data *rt_stack;
+
+/*
+ * Iteration support.
+ *
+ * Iterating the radix tree returns each pair of key and value in the ascending order
+ * of the key. To support this, the we iterate nodes of each level.
+ * rt_iter_node_data struct is used to track the iteration within a node.
+ * rt_iter has the array of this struct, stack, in order to track the iteration
+ * of every level. During the iteration, we also construct the key to return. The key
+ * is updated whenever we update the node iteration information, e.g., when advancing
+ * the current index within the node or when moving to the next node at the same level.
+ */
+typedef struct rt_iter_node_data
+{
+	rt_node *node;		/* current node being iterated */
+	int			current_idx;	/* current position. -1 for initial value */
+} rt_iter_node_data;
+
+struct rt_iter
+{
+	radix_tree *tree;
+
+	/* Track the iteration on nodes of each level */
+	rt_iter_node_data stack[RT_MAX_LEVEL];
+	int			stack_len;
+
+	/* The key is being constructed during the iteration */
+	uint64		key;
+};
+
+/* A radix tree with nodes */
+struct radix_tree
+{
+	MemoryContext context;
+
+	rt_node		*root;
+	uint64		max_val;
+	uint64		num_keys;
+	MemoryContextData *slabs[RT_NODE_KIND_COUNT];
+
+	/* statistics */
+	uint64		mem_used;
+	int32		cnt[RT_NODE_KIND_COUNT];
+};
+
+static rt_node *rt_node_grow(radix_tree *tree, rt_node *parent,
+											 rt_node *node, uint64 key);
+static bool rt_node_find_child(rt_node *node, rt_node **child_p, uint64 key);
+static bool rt_node_search(rt_node *node, Datum **slot_p, uint64 key,
+								   rt_action action);
+static void rt_extend(radix_tree *tree, uint64 key);
+static void rt_new_root(radix_tree *tree, uint64 key, Datum val);
+static rt_node *rt_node_insert_child(radix_tree *tree,
+													 rt_node *parent,
+													 rt_node *node,
+													 uint64 key);
+static void rt_node_insert_val(radix_tree *tree, rt_node *parent,
+									   rt_node *node, uint64 key, Datum val,
+									   bool *replaced_p);
+static inline void rt_iter_update_key(rt_iter *iter, uint8 chunk, uint8 shift);
+static Datum rt_node_iterate_next(rt_iter *iter, rt_iter_node_data *node_iter,
+										  bool *found_p);
+static void rt_store_iter_node(rt_iter *iter, rt_iter_node_data *node_iter,
+									   rt_node *node);
+static void rt_update_iter_stack(rt_iter *iter, int from);
+static void rt_verify_node(rt_node *node);
+
+/*
+ * Helper functions for accessing each kind of nodes.
+ */
+
+static inline int
+node_16_search_eq(rt_node_16 *node, uint8 chunk)
+{
+/*
+ * On Windows, even if we use SSE intrinsics, pg_rightmost_one_pos32 is slow.
+ * So we guard with HAVE__BUILTIN_CTZ as well.
+ *
+ * XXX: once we have the correct interfaces to pg_bitutils.h for Windows
+ * we can remove the HAVE__BUILTIN_CTZ condition.
+ */
+#if defined(__SSE2__) && defined(HAVE__BUILTIN_CTZ)
+	__m128i	key_v = _mm_set1_epi8(chunk);
+	__m128i data_v = _mm_loadu_si128((__m128i_u *) node->chunks);
+	__m128i cmp_v = _mm_cmpeq_epi8(key_v, data_v);
+	uint32	bitfield = _mm_movemask_epi8(cmp_v);
+
+	bitfield &= ((1 << node->n.count) - 1);
+
+	return bitfield ? pg_rightmost_one_pos32(bitfield) : -1;
+#else
+	for (int i = 0; i < node->n.count; i++)
+	{
+		if (node->chunks[i] > chunk)
+			return -1;
+
+		if (node->chunks[i] == chunk)
+			return i;
+	}
+
+	return -1;
+#endif
+}
+
+/*
+ * This is a bit more complicated than search_chunk_array_16_eq(), because
+ * until recently no unsigned uint8 comparison instruction existed on x86. So
+ * we need to play some trickery using _mm_min_epu8() to effectively get
+ * <=. There never will be any equal elements in the current uses, but that's
+ * what we get here...
+ */
+static inline int
+node_16_search_le(rt_node_16 *node, uint8 chunk)
+{
+#if defined(__SSE2__) && defined(HAVE__BUILTIN_CTZ)
+	__m128i key_v = _mm_set1_epi8(chunk);
+	__m128i data_v = _mm_loadu_si128((__m128i_u *) node->chunks);
+	__m128i min_v = _mm_min_epu8(data_v, key_v);
+	__m128i cmp_v = _mm_cmpeq_epi8(key_v, min_v);
+	uint32	bitfield = _mm_movemask_epi8(cmp_v);
+
+	bitfield &= ((1 << node->n.count) - 1);
+
+	return (bitfield) ? pg_rightmost_one_pos32(bitfield) : node->n.count;
+#else
+	int			index;
+
+	for (index = 0; index < node->n.count; index++)
+	{
+		if (node->chunks[index] >= chunk)
+			break;
+	}
+
+	return index;
+#endif
+}
+
+static inline int
+node_32_search_eq(rt_node_32 *node, uint8 chunk)
+{
+#if defined(__SSE2__) && defined(HAVE__BUILTIN_CTZ)
+	int	index = 0;
+	__m128i key_v = _mm_set1_epi8(chunk);
+
+	while (index < node->n.count)
+	{
+		__m128i data_v = _mm_loadu_si128((__m128i_u *) &(node->chunks[index]));
+		__m128i cmp_v = _mm_cmpeq_epi8(key_v, data_v);
+		uint32	bitfield = _mm_movemask_epi8(cmp_v);
+
+		bitfield &= ((UINT64CONST(1) << node->n.count) - 1);
+
+		if (bitfield)
+		{
+			index += pg_rightmost_one_pos32(bitfield);
+			break;
+		}
+
+		index += 16;
+	}
+
+	return (index < node->n.count) ? index : -1;
+#else
+	for (int i = 0; i < node->n.count; i++)
+	{
+		if (node->chunks[i] > chunk)
+			return -1;
+
+		if (node->chunks[i] == chunk)
+			return i;
+	}
+
+	return -1;
+#endif
+}
+
+/*
+ * Similar to node_16_search_le we need to play some trickery using
+ * _mm_min_epu8() to effectively get <=. There never will be any equal elements
+ * in the current uses, but that's what we get here...
+ */
+static inline int
+node_32_search_le(rt_node_32 *node, uint8 chunk)
+{
+#if defined(__SSE2__) && defined(HAVE__BUILTIN_CTZ)
+	int index = 0;
+	bool found = false;
+	__m128i key_v = _mm_set1_epi8(chunk);
+
+	while (index < node->n.count)
+	{
+		__m128i data_v = _mm_loadu_si128((__m128i_u *) &(node->chunks[index]));
+		__m128i min_v = _mm_min_epu8(data_v, key_v);
+		__m128i cmp_v = _mm_cmpeq_epi8(key_v, min_v);
+		uint32	bitfield = _mm_movemask_epi8(cmp_v);
+
+		bitfield &= ((UINT64CONST(1) << node->n.count)-1);
+
+		if (bitfield)
+		{
+			index += pg_rightmost_one_pos32(bitfield);
+			found = true;
+			break;
+		}
+
+		index += 16;
+	}
+
+	return found ? index : node->n.count;
+#else
+	int			index;
+
+	for (index = 0; index < node->n.count; index++)
+	{
+		if (node->chunks[index] >= chunk)
+			break;
+	}
+
+	return index;
+#endif
+}
+
+/* Does the given chunk in the node has the value? */
+static inline bool
+node_128_is_chunk_used(rt_node_128 *node, uint8 chunk)
+{
+	return node->slot_idxs[chunk] != RT_NODE_128_INVALID_IDX;
+}
+
+/* Is the slot in the node used? */
+static inline bool
+node_128_is_slot_used(rt_node_128 *node, uint8 slot)
+{
+	return ((node->isset[RT_NODE_BITMAP_BYTE(slot)] & RT_NODE_BITMAP_BIT(slot)) != 0);
+}
+
+/* Set the slot at the corresponding chunk */
+static inline void
+node_128_set(rt_node_128 *node, uint8 chunk, Datum val)
+{
+	int		slotpos;
+
+	/*
+	 * Find an unused slot. We iterate over the isset bitmap per byte
+	 * then check each bit.
+	 */
+	for (slotpos = 0; slotpos < RT_NODE_NSLOTS_BITS(128); slotpos++)
+	{
+		if (node->isset[slotpos] < 0xFF)
+			break;
+	}
+	Assert(slotpos < RT_NODE_NSLOTS_BITS(128));
+
+	slotpos *= BITS_PER_BYTE;
+	while (node_128_is_slot_used(node, slotpos))
+		slotpos++;
+
+	node->slot_idxs[chunk] = slotpos;
+	node->slots[slotpos] = val;
+	node->isset[RT_NODE_BITMAP_BYTE(slotpos)] |= RT_NODE_BITMAP_BIT(slotpos);
+}
+
+/* Delete the slot at the corresponding chunk */
+static inline void
+node_128_unset(rt_node_128 *node, uint8 chunk)
+{
+	int			slotpos = node->slot_idxs[chunk];
+
+	if (!node_128_is_chunk_used(node, chunk))
+		return;
+
+	node->isset[RT_NODE_BITMAP_BYTE(slotpos)] &= ~(RT_NODE_BITMAP_BIT(slotpos));
+	node->slot_idxs[chunk] = RT_NODE_128_INVALID_IDX;
+}
+
+/* Return the slot data corresponding to the chunk */
+static inline Datum
+node_128_get_chunk_slot(rt_node_128 *node, uint8 chunk)
+{
+	return node->slots[node->slot_idxs[chunk]];
+}
+
+/* Return true if the slot corresponding to the given chunk is in use */
+static inline bool
+node_256_is_chunk_used(rt_node_256 *node, uint8 chunk)
+{
+	return (node->isset[RT_NODE_BITMAP_BYTE(chunk)] & RT_NODE_BITMAP_BIT(chunk)) != 0;
+}
+
+/* Set the slot at the given chunk position */
+static inline void
+node_256_set(rt_node_256 *node, uint8 chunk, Datum slot)
+{
+	node->slots[chunk] = slot;
+	node->isset[RT_NODE_BITMAP_BYTE(chunk)] |= RT_NODE_BITMAP_BIT(chunk);
+}
+
+/* Set the slot at the given chunk position */
+static inline void
+node_256_unset(rt_node_256 *node, uint8 chunk)
+{
+	node->isset[RT_NODE_BITMAP_BYTE(chunk)] &= ~(RT_NODE_BITMAP_BIT(chunk));
+}
+
+/*
+ * Return the shift that is satisfied to store the given key.
+ */
+inline static int
+key_get_shift(uint64 key)
+{
+	return (key == 0)
+		? 0
+		: (pg_leftmost_one_pos64(key) / RT_NODE_SPAN) * RT_NODE_SPAN;
+}
+
+/*
+ * Return the max value stored in a node with the given shift.
+ */
+static uint64
+shift_get_max_val(int shift)
+{
+	if (shift == RT_MAX_SHIFT)
+		return UINT64_MAX;
+
+	return (UINT64CONST(1) << (shift + RT_NODE_SPAN)) - 1;
+}
+
+/*
+ * Allocate a new node with the given node kind.
+ */
+static rt_node *
+rt_alloc_node(radix_tree *tree, rt_node_kind kind)
+{
+	rt_node *newnode;
+
+	newnode = (rt_node *) MemoryContextAllocZero(tree->slabs[kind],
+												 rt_node_info[kind].size);
+	newnode->kind = kind;
+
+	/* Initialize slot_idxs to invalid values */
+	if (kind == RT_NODE_KIND_128)
+	{
+		rt_node_128 *n128 = (rt_node_128 *) newnode;
+
+		memset(&(n128->slot_idxs), RT_NODE_128_INVALID_IDX,
+			   sizeof(n128->slot_idxs));
+	}
+
+	/* update the statistics */
+	tree->mem_used += GetMemoryChunkSpace(newnode);
+	tree->cnt[kind]++;
+
+	return newnode;
+}
+
+/* Free the given node */
+static void
+rt_free_node(radix_tree *tree, rt_node *node)
+{
+	/* If we're deleting the root node, make the tree empty */
+	if (tree->root == node)
+		tree->root = NULL;
+
+	/* update the statistics */
+	tree->mem_used -= GetMemoryChunkSpace(node);
+	tree->cnt[node->kind]--;
+
+	Assert(tree->mem_used >= 0);
+	Assert(tree->cnt[node->kind] >= 0);
+
+	pfree(node);
+}
+
+/* Free a stack made by rt_delete */
+static void
+rt_free_stack(rt_stack stack)
+{
+	rt_stack ostack;
+
+	while (stack != NULL)
+	{
+		ostack = stack;
+		stack = stack->parent;
+		pfree(ostack);
+	}
+}
+
+/* Copy the common fields without the kind */
+static void
+rt_copy_node_common(rt_node *src, rt_node *dst)
+{
+	dst->shift = src->shift;
+	dst->chunk = src->chunk;
+	dst->count = src->count;
+}
+
+/*
+ * The radix tree doesn't sufficient height. Extend the radix tree so it can
+ * store the key.
+ */
+static void
+rt_extend(radix_tree *tree, uint64 key)
+{
+	int			target_shift;
+	int			shift = tree->root->shift + RT_NODE_SPAN;
+
+	target_shift = key_get_shift(key);
+
+	/* Grow tree from 'shift' to 'target_shift' */
+	while (shift <= target_shift)
+	{
+		rt_node_4 *node =
+		(rt_node_4 *) rt_alloc_node(tree, RT_NODE_KIND_4);
+
+		node->n.count = 1;
+		node->n.shift = shift;
+		node->chunks[0] = 0;
+		node->slots[0] = PointerGetDatum(tree->root);
+
+		tree->root->chunk = 0;
+		tree->root = (rt_node *) node;
+
+		shift += RT_NODE_SPAN;
+	}
+
+	tree->max_val = shift_get_max_val(target_shift);
+}
+
+/*
+ * Wrapper for rt_node_search to search the pointer to the child node in the
+ * node.
+ *
+ * Return true if the corresponding child is found, otherwise return false.  On success,
+ * it sets child_p.
+ */
+static bool
+rt_node_find_child(rt_node *node, rt_node **child_p, uint64 key)
+{
+	bool		found = false;
+	Datum	   *slot_ptr;
+
+	if (rt_node_search(node, &slot_ptr, key, RT_ACTION_FIND))
+	{
+		/* Found the pointer to the child node */
+		found = true;
+		*child_p = (rt_node *) DatumGetPointer(*slot_ptr);
+	}
+
+	return found;
+}
+
+/*
+ * Return true if the corresponding slot is used, otherwise return false.  On success,
+ * sets the pointer to the slot to slot_p.
+ */
+static bool
+rt_node_search(rt_node *node, Datum **slot_p, uint64 key,
+					   rt_action action)
+{
+	int			chunk = RT_GET_KEY_CHUNK(key, node->shift);
+	bool		found = false;
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node;
+
+				/* Do linear search */
+				for (int i = 0; i < n4->n.count; i++)
+				{
+					if (n4->chunks[i] > chunk)
+						break;
+
+					/*
+					 * If we find the chunk in the node, do the specified
+					 * action
+					 */
+					if (n4->chunks[i] == chunk)
+					{
+						if (action == RT_ACTION_FIND)
+							*slot_p = &(n4->slots[i]);
+						else	/* RT_ACTION_DELETE */
+						{
+							memmove(&(n4->chunks[i]), &(n4->chunks[i + 1]),
+									sizeof(uint8) * (n4->n.count - i - 1));
+							memmove(&(n4->slots[i]), &(n4->slots[i + 1]),
+									sizeof(rt_node *) * (n4->n.count - i - 1));
+						}
+
+						found = true;
+						break;
+					}
+				}
+
+				break;
+			}
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+				int			idx;
+
+				/* Search by SIMD instructions */
+				idx = node_16_search_eq(n16, chunk);
+
+				/* If we find the chunk in the node, do the specified action */
+				if (idx >= 0)
+				{
+					if (action == RT_ACTION_FIND)
+						*slot_p = &(n16->slots[idx]);
+					else		/* RT_ACTION_DELETE */
+					{
+						memmove(&(n16->chunks[idx]), &(n16->chunks[idx + 1]),
+								sizeof(uint8) * (n16->n.count - idx - 1));
+						memmove(&(n16->slots[idx]), &(n16->slots[idx + 1]),
+								sizeof(rt_node *) * (n16->n.count - idx - 1));
+					}
+
+					found = true;
+				}
+
+				break;
+			}
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+				int			idx;
+
+				/* Search by SIMD instructions */
+				idx = node_32_search_eq(n32, chunk);
+
+				/* If we find the chunk in the node, do the specified action */
+				if (idx >= 0)
+				{
+					if (action == RT_ACTION_FIND)
+						*slot_p = &(n32->slots[idx]);
+					else		/* RT_ACTION_DELETE */
+					{
+						memmove(&(n32->chunks[idx]), &(n32->chunks[idx + 1]),
+								sizeof(uint8) * (n32->n.count - idx - 1));
+						memmove(&(n32->slots[idx]), &(n32->slots[idx + 1]),
+								sizeof(rt_node *) * (n32->n.count - idx - 1));
+					}
+
+					found = true;
+				}
+
+				break;
+			}
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+
+				/* If we find the chunk in the node, do the specified action */
+				if (node_128_is_chunk_used(n128, chunk))
+				{
+					if (action == RT_ACTION_FIND)
+						*slot_p = &(n128->slots[n128->slot_idxs[chunk]]);
+					else		/* RT_ACTION_DELETE */
+						node_128_unset(n128, chunk);
+
+					found = true;
+				}
+
+				break;
+			}
+		case RT_NODE_KIND_256:
+			{
+				rt_node_256 *n256 = (rt_node_256 *) node;
+
+				/* If we find the chunk in the node, do the specified action */
+				if (node_256_is_chunk_used(n256, chunk))
+				{
+					if (action == RT_ACTION_FIND)
+						*slot_p = &(n256->slots[chunk]);
+					else		/* RT_ACTION_DELETE */
+						node_256_unset(n256, chunk);
+
+					found = true;
+				}
+
+				break;
+			}
+	}
+
+	/* Update the statistics */
+	if (action == RT_ACTION_DELETE && found)
+		node->count--;
+
+	return found;
+}
+
+/*
+ * Create a new node as the root. Subordinate nodes will be created during
+ * the insertion.
+ */
+static void
+rt_new_root(radix_tree *tree, uint64 key, Datum val)
+{
+	rt_node_4 *n4 =
+	(rt_node_4 *) rt_alloc_node(tree, RT_NODE_KIND_4);
+	int			shift = key_get_shift(key);
+
+	n4->n.shift = shift;
+	tree->max_val = shift_get_max_val(shift);
+	tree->root = (rt_node *) n4;
+}
+
+/* Insert 'node' as a child node of 'parent' */
+static rt_node *
+rt_node_insert_child(radix_tree *tree, rt_node *parent,
+					 rt_node *node, uint64 key)
+{
+	rt_node *newchild =
+	(rt_node *) rt_alloc_node(tree, RT_NODE_KIND_4);
+
+	Assert(!IS_LEAF_NODE(node));
+
+	newchild->shift = node->shift - RT_NODE_SPAN;
+	newchild->chunk = RT_GET_KEY_CHUNK(key, node->shift);
+
+	rt_node_insert_val(tree, parent, node, key, PointerGetDatum(newchild), NULL);
+
+	return (rt_node *) newchild;
+}
+
+/*
+ * Insert the value to the node. The node grows if it's full.
+ */
+static void
+rt_node_insert_val(radix_tree *tree, rt_node *parent,
+						   rt_node *node, uint64 key, Datum val,
+						   bool *replaced_p)
+{
+	int			chunk = RT_GET_KEY_CHUNK(key, node->shift);
+	bool		replaced = false;
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node;
+				int			idx;
+
+				for (idx = 0; idx < n4->n.count; idx++)
+				{
+					if (n4->chunks[idx] >= chunk)
+						break;
+				}
+
+				if (NODE_HAS_FREE_SLOT(n4))
+				{
+					if (n4->n.count == 0)
+					{
+						/* the first key for this node, add it */
+					}
+					else if (n4->chunks[idx] == chunk)
+					{
+						/* found the key, replace it */
+						replaced = true;
+					}
+					else if (idx != n4->n.count)
+					{
+						/*
+						 * the key needs to be inserted in the middle of the
+						 * array, make space for the new key.
+						 */
+						memmove(&(n4->chunks[idx + 1]), &(n4->chunks[idx]),
+								sizeof(uint8) * (n4->n.count - idx));
+						memmove(&(n4->slots[idx + 1]), &(n4->slots[idx]),
+								sizeof(Datum) * (n4->n.count - idx));
+					}
+
+					n4->chunks[idx] = chunk;
+					n4->slots[idx] = val;
+
+					/* Done */
+					break;
+				}
+
+				/* The node doesn't have free slot so needs to grow */
+				node = rt_node_grow(tree, parent, node, key);
+				Assert(node->kind == RT_NODE_KIND_16);
+			}
+			/* FALLTHROUGH */
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+				int			idx;
+
+				idx = node_16_search_le(n16, chunk);
+
+				if (NODE_HAS_FREE_SLOT(n16))
+				{
+					if (n16->n.count == 0)
+					{
+						/* first key for this node, add it */
+					}
+					else if (n16->chunks[idx] == chunk)
+					{
+						/* found the key, replace it */
+						replaced = true;
+					}
+					else if (idx != n16->n.count)
+					{
+						/*
+						 * the key needs to be inserted in the middle of the
+						 * array, make space for the new key.
+						 */
+						memmove(&(n16->chunks[idx + 1]), &(n16->chunks[idx]),
+								sizeof(uint8) * (n16->n.count - idx));
+						memmove(&(n16->slots[idx + 1]), &(n16->slots[idx]),
+								sizeof(Datum) * (n16->n.count - idx));
+					}
+
+					n16->chunks[idx] = chunk;
+					n16->slots[idx] = val;
+
+					/* Done */
+					break;
+				}
+
+				/* The node doesn't have free slot so needs to grow */
+				node = rt_node_grow(tree, parent, node, key);
+				Assert(node->kind == RT_NODE_KIND_32);
+			}
+			/* FALLTHROUGH */
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+				int			idx;
+
+				idx = node_32_search_le(n32, chunk);
+
+				if (NODE_HAS_FREE_SLOT(n32))
+				{
+					if (n32->n.count == 0)
+					{
+						/* first key for this node, add it */
+					}
+					else if (n32->chunks[idx] == chunk)
+					{
+						/* found the key, replace it */
+						replaced = true;
+					}
+					else if (idx != n32->n.count)
+					{
+						/*
+						 * the key needs to be inserted in the middle of the
+						 * array, make space for the new key.
+						 */
+						memmove(&(n32->chunks[idx + 1]), &(n32->chunks[idx]),
+								sizeof(uint8) * (n32->n.count - idx));
+						memmove(&(n32->slots[idx + 1]), &(n32->slots[idx]),
+								sizeof(Datum) * (n32->n.count - idx));
+					}
+
+					n32->chunks[idx] = chunk;
+					n32->slots[idx] = val;
+
+					/* Done */
+					break;
+				}
+
+				/* The node doesn't have free slot so needs to grow */
+				node = rt_node_grow(tree, parent, node, key);
+				Assert(node->kind == RT_NODE_KIND_128);
+			}
+			/* FALLTHROUGH */
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+
+				if (node_128_is_chunk_used(n128, chunk))
+				{
+					/* found the existing value */
+					node_128_set(n128, chunk, val);
+					replaced = true;
+					break;
+				}
+
+				if (NODE_HAS_FREE_SLOT(n128))
+				{
+					node_128_set(n128, chunk, val);
+
+					/* Done */
+					break;
+				}
+
+				/* The node doesn't have free slot so needs to grow */
+				node = rt_node_grow(tree, parent, node, key);
+				Assert(node->kind == RT_NODE_KIND_256);
+			}
+			/* FALLTHROUGH */
+		case RT_NODE_KIND_256:
+			{
+				rt_node_256 *n256 = (rt_node_256 *) node;
+
+				if (node_256_is_chunk_used(n256, chunk))
+					replaced = true;
+
+				node_256_set(n256, chunk, val);
+
+				break;
+			}
+	}
+
+	/* Update statistics */
+	if (!replaced)
+		node->count++;
+
+	if (replaced_p)
+		*replaced_p = replaced;
+
+	/*
+	 * Done. Finally, verify the chunk and value is inserted or replaced
+	 * properly in the node.
+	 */
+	rt_verify_node(node);
+}
+
+/* Change the node type to the next larger one */
+static rt_node *
+rt_node_grow(radix_tree *tree, rt_node *parent, rt_node *node,
+					 uint64 key)
+{
+	rt_node *newnode = NULL;
+
+	Assert(node->count == rt_node_info[node->kind].max_slots);
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node;
+				rt_node_16 *new16 =
+				(rt_node_16 *) rt_alloc_node(tree, RT_NODE_KIND_16);
+
+				rt_copy_node_common((rt_node *) n4,
+											(rt_node *) new16);
+
+				/* Copy both chunks and slots to the new node */
+				memcpy(&(new16->chunks), &(n4->chunks), sizeof(uint8) * 4);
+				memcpy(&(new16->slots), &(n4->slots), sizeof(Datum) * 4);
+
+				newnode = (rt_node *) new16;
+				break;
+			}
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+				rt_node_32 *new32 =
+				(rt_node_32 *) rt_alloc_node(tree, RT_NODE_KIND_32);
+
+				rt_copy_node_common((rt_node *) n16,
+											(rt_node *) new32);
+
+				/* Copy both chunks and slots to the new node */
+				memcpy(&(new32->chunks), &(n16->chunks), sizeof(uint8) * 16);
+				memcpy(&(new32->slots), &(n16->slots), sizeof(Datum) * 16);
+
+				newnode = (rt_node *) new32;
+				break;
+			}
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+				rt_node_128 *new128 =
+				(rt_node_128 *) rt_alloc_node(tree, RT_NODE_KIND_128);
+
+				/* Copy both chunks and slots to the new node */
+				rt_copy_node_common((rt_node *) n32,
+											(rt_node *) new128);
+
+				for (int i = 0; i < n32->n.count; i++)
+					node_128_set(new128, n32->chunks[i], n32->slots[i]);
+
+				newnode = (rt_node *) new128;
+				break;
+			}
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+				rt_node_256 *new256 =
+				(rt_node_256 *) rt_alloc_node(tree, RT_NODE_KIND_256);
+				int			cnt = 0;
+
+				rt_copy_node_common((rt_node *) n128,
+											(rt_node *) new256);
+
+				for (int i = 0; i < RT_NODE_MAX_SLOTS && cnt < n128->n.count; i++)
+				{
+					if (!node_128_is_chunk_used(n128, i))
+						continue;
+
+					node_256_set(new256, i, node_128_get_chunk_slot(n128, i));
+					cnt++;
+				}
+
+				newnode = (rt_node *) new256;
+				break;
+			}
+		case RT_NODE_KIND_256:
+			elog(ERROR, "radix tree node-256 cannot grow");
+			break;
+	}
+
+	if (parent == node)
+	{
+		/* Replace the root node with the new large node */
+		tree->root = newnode;
+	}
+	else
+	{
+		Datum	   *slot_ptr = NULL;
+
+		/* Redirect from the parent to the node */
+		rt_node_search(parent, &slot_ptr, key, RT_ACTION_FIND);
+		Assert(*slot_ptr);
+		*slot_ptr = PointerGetDatum(newnode);
+	}
+
+	/* Verify the node has grown properly */
+	rt_verify_node(newnode);
+
+	/* Free the old node */
+	rt_free_node(tree, node);
+
+	return newnode;
+}
+
+/*
+ * Create the radix tree in the given memory context and return it.
+ */
+radix_tree *
+rt_create(MemoryContext ctx)
+{
+	radix_tree *tree;
+	MemoryContext old_ctx;
+
+	old_ctx = MemoryContextSwitchTo(ctx);
+
+	tree = palloc(sizeof(radix_tree));
+	tree->context = ctx;
+	tree->root = NULL;
+	tree->max_val = 0;
+	tree->num_keys = 0;
+	tree->mem_used = 0;
+
+	/* Create the slab allocator for each size class */
+	for (int i = 0; i < RT_NODE_KIND_COUNT; i++)
+	{
+		tree->slabs[i] = SlabContextCreate(ctx,
+										   rt_node_info[i].name,
+										   SLAB_DEFAULT_BLOCK_SIZE,
+										   rt_node_info[i].size);
+		tree->cnt[i] = 0;
+	}
+
+	MemoryContextSwitchTo(old_ctx);
+
+	return tree;
+}
+
+/*
+ * Free the given radix tree.
+ */
+void
+rt_free(radix_tree *tree)
+{
+	for (int i = 0; i < RT_NODE_KIND_COUNT; i++)
+		MemoryContextDelete(tree->slabs[i]);
+
+	pfree(tree);
+}
+
+/*
+ * Insert the key with the val.
+ *
+ * found_p is set to true if the key already present, otherwise false, if
+ * it's not NULL.
+ *
+ * XXX: do we need to support update_if_exists behavior?
+ */
+void
+rt_insert(radix_tree *tree, uint64 key, Datum val, bool *found_p)
+{
+	int			shift;
+	bool		replaced;
+	rt_node *node;
+	rt_node *parent = tree->root;
+
+	/* Empty tree, create the root */
+	if (!tree->root)
+		rt_new_root(tree, key, val);
+
+	/* Extend the tree if necessary */
+	if (key > tree->max_val)
+		rt_extend(tree, key);
+
+	Assert(tree->root);
+
+	shift = tree->root->shift;
+	node = tree->root;
+	while (shift > 0)
+	{
+		rt_node *child;
+
+		if (!rt_node_find_child(node, &child, key))
+			child = rt_node_insert_child(tree, parent, node, key);
+
+		Assert(child != NULL);
+
+		parent = node;
+		node = child;
+		shift -= RT_NODE_SPAN;
+	}
+
+	/* arrived at a leaf */
+	Assert(IS_LEAF_NODE(node));
+
+	rt_node_insert_val(tree, parent, node, key, val, &replaced);
+
+	/* Update the statistics */
+	if (!replaced)
+		tree->num_keys++;
+
+	if (found_p)
+		*found_p = replaced;
+}
+
+/*
+ * Search the given key in the radix tree. Return true if the key is successfully
+ * found, otherwise return false.  On success, we set the value to *val_p so
+ * it must not be NULL.
+ */
+bool
+rt_search(radix_tree *tree, uint64 key, Datum *val_p)
+{
+	rt_node *node;
+	Datum	   *value_ptr;
+	int			shift;
+
+	Assert(val_p);
+
+	if (!tree->root || key > tree->max_val)
+		return false;
+
+	node = tree->root;
+	shift = tree->root->shift;
+	while (shift > 0)
+	{
+		rt_node *child;
+
+		if (!rt_node_find_child(node, &child, key))
+			return false;
+
+		node = child;
+		shift -= RT_NODE_SPAN;
+	}
+
+	/* We reached at a leaf node, search the corresponding slot */
+	Assert(IS_LEAF_NODE(node));
+
+	if (!rt_node_search(node, &value_ptr, key, RT_ACTION_FIND))
+		return false;
+
+	/* Found, set the value to return */
+	*val_p = *value_ptr;
+	return true;
+}
+
+/*
+ * Delete the given key from the radix tree. Return true if the key is found (and
+ * deleted), otherwise do nothing and return false.
+ */
+bool
+rt_delete(radix_tree *tree, uint64 key)
+{
+	rt_node *node;
+	int			shift;
+	rt_stack stack = NULL;
+	bool		deleted;
+
+	if (!tree->root || key > tree->max_val)
+		return false;
+
+	/*
+	 * Descending the tree to search the key while building a stack of nodes
+	 * we visited.
+	 */
+	node = tree->root;
+	shift = tree->root->shift;
+	while (shift >= 0)
+	{
+		rt_node *child;
+		rt_stack new_stack;
+
+		new_stack = (rt_stack) palloc(sizeof(rt_stack_data));
+		new_stack->node = node;
+		new_stack->parent = stack;
+		stack = new_stack;
+
+		if (IS_LEAF_NODE(node))
+			break;
+
+		if (!rt_node_find_child(node, &child, key))
+		{
+			rt_free_stack(stack);
+			return false;
+		}
+
+		node = child;
+		shift -= RT_NODE_SPAN;
+	}
+
+	/*
+	 * Delete the key from the leaf node and recursively delete internal nodes
+	 * if necessary.
+	 */
+	Assert(IS_LEAF_NODE(stack->node));
+	while (stack != NULL)
+	{
+		rt_node *node;
+		Datum	   *slot;
+
+		/* pop the node from the stack */
+		node = stack->node;
+		stack = stack->parent;
+
+		deleted = rt_node_search(node, &slot, key, RT_ACTION_DELETE);
+
+		/* If the node didn't become empty, we stop deleting the key */
+		if (!IS_EMPTY_NODE(node))
+			break;
+
+		Assert(deleted);
+
+		/* The node became empty */
+		rt_free_node(tree, node);
+
+		/*
+		 * If we eventually deleted the root node while recursively deleting
+		 * empty nodes, we make the tree empty.
+		 */
+		if (stack == NULL)
+		{
+			tree->root = NULL;
+			tree->max_val = 0;
+		}
+	}
+
+	if (deleted)
+		tree->num_keys--;
+
+	rt_free_stack(stack);
+	return deleted;
+}
+
+/* Create and return the iterator for the given radix tree */
+rt_iter *
+rt_begin_iterate(radix_tree *tree)
+{
+	MemoryContext old_ctx;
+	rt_iter *iter;
+	int			top_level;
+
+	old_ctx = MemoryContextSwitchTo(tree->context);
+
+	iter = (rt_iter *) palloc0(sizeof(rt_iter));
+	iter->tree = tree;
+
+	/* empty tree */
+	if (!iter->tree)
+		return iter;
+
+	top_level = iter->tree->root->shift / RT_NODE_SPAN;
+
+	iter->stack_len = top_level;
+	iter->stack[top_level].node = iter->tree->root;
+	iter->stack[top_level].current_idx = -1;
+
+	/*
+	 * Descend to the left most leaf node from the root. The key is being
+	 * constructed while descending to the leaf.
+	 */
+	rt_update_iter_stack(iter, top_level);
+
+	MemoryContextSwitchTo(old_ctx);
+
+	return iter;
+}
+
+/*
+ * Update the stack of the radix tree node while descending to the leaf from
+ * the 'from' level.
+ */
+static void
+rt_update_iter_stack(rt_iter *iter, int from)
+{
+	rt_node *node = iter->stack[from].node;
+	int			level = from;
+
+	for (;;)
+	{
+		rt_iter_node_data *node_iter = &(iter->stack[level--]);
+		bool		found;
+
+		/* Set the node to this level */
+		rt_store_iter_node(iter, node_iter, node);
+
+		/* Finish if we reached to the leaf node */
+		if (IS_LEAF_NODE(node))
+			break;
+
+		/* Advance to the next slot in the node */
+		node = (rt_node *)
+			DatumGetPointer(rt_node_iterate_next(iter, node_iter, &found));
+
+		/*
+		 * Since we always get the first slot in the node, we have to found
+		 * the slot.
+		 */
+		Assert(found);
+	}
+}
+
+/*
+ * Return true with setting key_p and value_p if there is next key.  Otherwise,
+ * return false.
+ */
+bool
+rt_iterate_next(rt_iter *iter, uint64 *key_p, Datum *value_p)
+{
+	bool		found = false;
+	Datum		slot = (Datum) 0;
+
+	/* Empty tree */
+	if (!iter->tree)
+		return false;
+
+	for (;;)
+	{
+		rt_node *node;
+		rt_iter_node_data *node_iter;
+		int		level;
+
+		/*
+		 * Iterate node at each level from the bottom of the tree, i.e.,
+		 * the lead node, until we find the next slot.
+		 */
+		for (level = 0; level <= iter->stack_len; level++)
+		{
+			slot = rt_node_iterate_next(iter, &(iter->stack[level]), &found);
+
+			if (found)
+				break;
+		}
+
+		/* We could not find any new key-value pair, the iteration finished */
+		if (!found)
+			break;
+
+		/* found the next slot at the leaf node, return it */
+		if (level == 0)
+		{
+			*key_p = iter->key;
+			*value_p = slot;
+			break;
+		}
+
+		/*
+		 * We have advanced slots more than one nodes including both the lead
+		 * node and internal nodes. So we update the stack by descending to the
+		 * left most leaf node from this level.
+		 */
+		node = (rt_node *) DatumGetPointer(slot);
+		node_iter = &(iter->stack[level - 1]);
+		rt_store_iter_node(iter, node_iter, node);
+		rt_update_iter_stack(iter, level - 1);
+	}
+
+	return found;
+}
+
+void
+rt_end_iterate(rt_iter *iter)
+{
+	pfree(iter);
+}
+
+/*
+ * Iterate over the given radix tree node and returns the next slot of the given
+ * node and set true to *found_p, if any.  Otherwise, set false to *found_p.
+ */
+static Datum
+rt_node_iterate_next(rt_iter *iter, rt_iter_node_data *node_iter, bool *found_p)
+{
+	rt_node *node = node_iter->node;
+	Datum		slot = (Datum) 0;
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node_iter->node;
+
+				node_iter->current_idx++;
+
+				if (node_iter->current_idx >= n4->n.count)
+					goto not_found;
+
+				slot = n4->slots[node_iter->current_idx];
+
+				/* Update the part of the key by the current chunk */
+				if (IS_LEAF_NODE(n4))
+					rt_iter_update_key(iter, n4->chunks[node_iter->current_idx], 0);
+
+				break;
+			}
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+
+				node_iter->current_idx++;
+
+				if (node_iter->current_idx >= n16->n.count)
+					goto not_found;
+
+				slot = n16->slots[node_iter->current_idx];
+
+				/* Update the part of the key */
+				if (IS_LEAF_NODE(n16))
+					rt_iter_update_key(iter, n16->chunks[node_iter->current_idx], 0);
+
+				break;
+			}
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+
+				node_iter->current_idx++;
+
+				if (node_iter->current_idx >= n32->n.count)
+					goto not_found;
+
+				slot = n32->slots[node_iter->current_idx];
+
+				/* Update the part of the key */
+				if (IS_LEAF_NODE(n32))
+					rt_iter_update_key(iter, n32->chunks[node_iter->current_idx], 0);
+
+				break;
+			}
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+				int			i;
+
+				for (i = node_iter->current_idx + 1; i < 256; i++)
+				{
+					if (node_128_is_chunk_used(n128, i))
+						break;
+				}
+
+				if (i >= 256)
+					goto not_found;
+
+				node_iter->current_idx = i;
+				slot = node_128_get_chunk_slot(n128, i);
+
+				/* Update the part of the key */
+				if (IS_LEAF_NODE(n128))
+					rt_iter_update_key(iter, node_iter->current_idx, 0);
+
+				break;
+			}
+		case RT_NODE_KIND_256:
+			{
+				rt_node_256 *n256 = (rt_node_256 *) node;
+				int			i;
+
+				for (i = node_iter->current_idx + 1; i < 256; i++)
+				{
+					if (node_256_is_chunk_used(n256, i))
+						break;
+				}
+
+				if (i >= 256)
+					goto not_found;
+
+				node_iter->current_idx = i;
+				slot = n256->slots[i];
+
+				/* Update the part of the key */
+				if (IS_LEAF_NODE(n256))
+					rt_iter_update_key(iter, node_iter->current_idx, 0);
+
+				break;
+			}
+	}
+
+	*found_p = true;
+	return slot;
+
+not_found:
+	*found_p = false;
+	return (Datum) 0;
+}
+
+/*
+ * Initialize and update the node iteration struct with the given radix tree
+ * node. This function also updates the part of the key by the chunk of the
+ * given node.
+ */
+static void
+rt_store_iter_node(rt_iter *iter, rt_iter_node_data *node_iter,
+				   rt_node *node)
+{
+	node_iter->node = node;
+	node_iter->current_idx = -1;
+
+	rt_iter_update_key(iter, node->chunk, node->shift + RT_NODE_SPAN);
+}
+
+static inline void
+rt_iter_update_key(rt_iter *iter, uint8 chunk, uint8 shift)
+{
+	iter->key &= ~(((uint64) RT_CHUNK_MASK) << shift);
+	iter->key |= (((uint64) chunk) << shift);
+}
+
+/*
+ * Return the number of keys in the radix tree.
+ */
+uint64
+rt_num_entries(radix_tree *tree)
+{
+	return tree->num_keys;
+}
+
+/*
+ * Return the statistics of the amount of memory used by the radix tree.
+ */
+uint64
+rt_memory_usage(radix_tree *tree)
+{
+	return tree->mem_used;
+}
+
+/*
+ * Verify the radix tree node.
+ */
+static void
+rt_verify_node(rt_node *node)
+{
+#ifdef USE_ASSERT_CHECKING
+	Assert(node->count >= 0);
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node;
+
+				/* Check if the chunks in the node are sorted */
+				for (int i = 1; i < n4->n.count; i++)
+					Assert(n4->chunks[i - 1] < n4->chunks[i]);
+
+				break;
+			}
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+
+				/* Check if the chunks in the node are sorted */
+				for (int i = 1; i < n16->n.count; i++)
+					Assert(n16->chunks[i - 1] < n16->chunks[i]);
+
+				break;
+			}
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+
+				/* Check if the chunks in the node are sorted */
+				for (int i = 1; i < n32->n.count; i++)
+					Assert(n32->chunks[i - 1] < n32->chunks[i]);
+
+				break;
+			}
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+				int			cnt = 0;
+
+				for (int i = 0; i < RT_NODE_MAX_SLOTS; i++)
+				{
+					if (!node_128_is_chunk_used(n128, i))
+						continue;
+
+					/* Check if the corresponding slot is used */
+					Assert(node_128_is_slot_used(n128, n128->slot_idxs[i]));
+
+					cnt++;
+				}
+
+				Assert(n128->n.count == cnt);
+				break;
+			}
+		case RT_NODE_KIND_256:
+			{
+				rt_node_256 *n256 = (rt_node_256 *) node;
+				int			cnt = 0;
+
+				for (int i = 0; i < RT_NODE_NSLOTS_BITS(RT_NODE_MAX_SLOTS); i++)
+					cnt += pg_popcount32(n256->isset[i]);
+
+				/* Check if the number of used chunk matches */
+				Assert(n256->n.count == cnt);
+
+				break;
+			}
+	}
+#endif
+}
+
+/***************** DEBUG FUNCTIONS *****************/
+#ifdef RT_DEBUG
+void
+rt_stats(radix_tree *tree)
+{
+	fprintf(stderr, "num_keys = %lu, height = %u, n4 = %u(%lu), n16 = %u(%lu),n32 = %u(%lu), n128 = %u(%lu), n256 = %u(%lu)",
+			tree->num_keys,
+			tree->root->shift / RT_NODE_SPAN,
+			tree->cnt[0], tree->cnt[0] * sizeof(rt_node_4),
+			tree->cnt[1], tree->cnt[1] * sizeof(rt_node_16),
+			tree->cnt[2], tree->cnt[2] * sizeof(rt_node_32),
+			tree->cnt[3], tree->cnt[3] * sizeof(rt_node_128),
+			tree->cnt[4], tree->cnt[4] * sizeof(rt_node_256));
+	/* rt_dump(tree); */
+}
+
+static void
+rt_print_slot(StringInfo buf, uint8 chunk, Datum slot, int idx, bool is_leaf, int level)
+{
+	char		space[128] = {0};
+
+	if (level > 0)
+		sprintf(space, "%*c", level * 4, ' ');
+
+	if (is_leaf)
+		appendStringInfo(buf, "%s[%d] \"0x%X\" val(0x%lX) LEAF\n",
+						 space,
+						 idx,
+						 chunk,
+						 DatumGetInt64(slot));
+	else
+		appendStringInfo(buf, "%s[%d] \"0x%X\" -> ",
+						 space,
+						 idx,
+						 chunk);
+}
+
+static void
+rt_dump_node(rt_node *node, int level, StringInfo buf, bool recurse)
+{
+	bool		is_leaf = IS_LEAF_NODE(node);
+
+	appendStringInfo(buf, "[\"%s\" type %d, cnt %u, shift %u, chunk \"0x%X\"] chunks:\n",
+					 IS_LEAF_NODE(node) ? "LEAF" : "INNR",
+					 (node->kind == RT_NODE_KIND_4) ? 4 :
+					 (node->kind == RT_NODE_KIND_32) ? 32 :
+					 (node->kind == RT_NODE_KIND_128) ? 128 : 256,
+					 node->count, node->shift, node->chunk);
+
+	switch (node->kind)
+	{
+		case RT_NODE_KIND_4:
+			{
+				rt_node_4 *n4 = (rt_node_4 *) node;
+
+				for (int i = 0; i < n4->n.count; i++)
+				{
+					rt_print_slot(buf, n4->chunks[i], n4->slots[i], i, is_leaf, level);
+
+					if (!is_leaf)
+					{
+						if (recurse)
+						{
+							StringInfoData buf2;
+
+							initStringInfo(&buf2);
+							rt_dump_node((rt_node *) n4->slots[i], level + 1, &buf2, recurse);
+							appendStringInfo(buf, "%s", buf2.data);
+						}
+						else
+							appendStringInfo(buf, "\n");
+					}
+				}
+				break;
+			}
+		case RT_NODE_KIND_16:
+			{
+				rt_node_16 *n16 = (rt_node_16 *) node;
+
+				for (int i = 0; i < n16->n.count; i++)
+				{
+					rt_print_slot(buf, n16->chunks[i], n16->slots[i], i, is_leaf, level);
+
+					if (!is_leaf)
+					{
+						if (recurse)
+						{
+							StringInfoData buf2;
+
+							initStringInfo(&buf2);
+							rt_dump_node((rt_node *) n16->slots[i], level + 1, &buf2, recurse);
+							appendStringInfo(buf, "%s", buf2.data);
+						}
+						else
+							appendStringInfo(buf, "\n");
+					}
+				}
+				break;
+			}
+		case RT_NODE_KIND_32:
+			{
+				rt_node_32 *n32 = (rt_node_32 *) node;
+
+				for (int i = 0; i < n32->n.count; i++)
+				{
+					rt_print_slot(buf, n32->chunks[i], n32->slots[i], i, is_leaf, level);
+
+					if (!is_leaf)
+					{
+						if (recurse)
+						{
+							StringInfoData buf2;
+
+							initStringInfo(&buf2);
+							rt_dump_node((rt_node *) n32->slots[i], level + 1, &buf2, recurse);
+							appendStringInfo(buf, "%s", buf2.data);
+						}
+						else
+							appendStringInfo(buf, "\n");
+					}
+				}
+				break;
+			}
+		case RT_NODE_KIND_128:
+			{
+				rt_node_128 *n128 = (rt_node_128 *) node;
+
+				for (int j = 0; j < 256; j++)
+				{
+					if (!node_128_is_chunk_used(n128, j))
+						continue;
+
+					appendStringInfo(buf, "slot_idxs[%d]=%d, ", j, n128->slot_idxs[j]);
+				}
+				appendStringInfo(buf, "\nisset-bitmap:");
+				for (int j = 0; j < 16; j++)
+				{
+					appendStringInfo(buf, "%X ", (uint8) n128->isset[j]);
+				}
+				appendStringInfo(buf, "\n");
+
+				for (int i = 0; i < 256; i++)
+				{
+					if (!node_128_is_chunk_used(n128, i))
+						continue;
+
+					rt_print_slot(buf, i, node_128_get_chunk_slot(n128, i),
+										  i, is_leaf, level);
+
+					if (!is_leaf)
+					{
+						if (recurse)
+						{
+							StringInfoData buf2;
+
+							initStringInfo(&buf2);
+							rt_dump_node((rt_node *) node_128_get_chunk_slot(n128, i),
+												 level + 1, &buf2, recurse);
+							appendStringInfo(buf, "%s", buf2.data);
+						}
+						else
+							appendStringInfo(buf, "\n");
+					}
+				}
+				break;
+			}
+		case RT_NODE_KIND_256:
+			{
+				rt_node_256 *n256 = (rt_node_256 *) node;
+
+				for (int i = 0; i < 256; i++)
+				{
+					if (!node_256_is_chunk_used(n256, i))
+						continue;
+
+					rt_print_slot(buf, i, n256->slots[i], i, is_leaf, level);
+
+					if (!is_leaf)
+					{
+						if (recurse)
+						{
+							StringInfoData buf2;
+
+							initStringInfo(&buf2);
+							rt_dump_node((rt_node *) n256->slots[i], level + 1, &buf2, recurse);
+							appendStringInfo(buf, "%s", buf2.data);
+						}
+						else
+							appendStringInfo(buf, "\n");
+					}
+				}
+				break;
+			}
+	}
+}
+
+void
+rt_dump_search(radix_tree *tree, uint64 key)
+{
+	StringInfoData buf;
+	rt_node *node;
+	int			shift;
+	int			level = 0;
+
+	elog(NOTICE, "-----------------------------------------------------------");
+	elog(NOTICE, "max_val = %lu (0x%lX)", tree->max_val, tree->max_val);
+
+	if (!tree->root)
+	{
+		elog(NOTICE, "tree is empty");
+		return;
+	}
+
+	if (key > tree->max_val)
+	{
+		elog(NOTICE, "key %lu (0x%lX) is larger than max val",
+			 key, key);
+		return;
+	}
+
+	initStringInfo(&buf);
+	node = tree->root;
+	shift = tree->root->shift;
+	while (shift >= 0)
+	{
+		rt_node *child;
+
+		rt_dump_node(node, level, &buf, false);
+
+		if (IS_LEAF_NODE(node))
+		{
+			Datum	   *dummy;
+
+			/* We reached at a leaf node, find the corresponding slot */
+			rt_node_search(node, &dummy, key, RT_ACTION_FIND);
+
+			break;
+		}
+
+		if (!rt_node_find_child(node, &child, key))
+			break;
+
+		node = child;
+		shift -= RT_NODE_SPAN;
+		level++;
+	}
+
+	elog(NOTICE, "\n%s", buf.data);
+}
+
+void
+rt_dump(radix_tree *tree)
+{
+	StringInfoData buf;
+
+	initStringInfo(&buf);
+
+	elog(NOTICE, "-----------------------------------------------------------");
+	elog(NOTICE, "max_val = %lu", tree->max_val);
+	rt_dump_node(tree->root, 0, &buf, true);
+	elog(NOTICE, "\n%s", buf.data);
+	elog(NOTICE, "-----------------------------------------------------------");
+}
+#endif
diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h
new file mode 100644
index 0000000000..7efd4bb735
--- /dev/null
+++ b/src/include/lib/radixtree.h
@@ -0,0 +1,42 @@
+/*-------------------------------------------------------------------------
+ *
+ * radixtree.h
+ *	  Interface for radix tree.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/include/lib/radixtree.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef RADIXTREE_H
+#define RADIXTREE_H
+
+#include "postgres.h"
+
+/* #define RT_DEBUG 1 */
+
+typedef struct radix_tree radix_tree;
+typedef struct rt_iter rt_iter;
+
+extern radix_tree *rt_create(MemoryContext ctx);
+extern bool rt_search(radix_tree *tree, uint64 key, Datum *val_p);
+extern void rt_insert(radix_tree *tree, uint64 key, Datum val, bool *found_p);
+extern bool rt_delete(radix_tree *tree, uint64 key);
+extern void rt_free(radix_tree *tree);
+extern uint64 rt_memory_usage(radix_tree *tree);
+extern uint64 rt_num_entries(radix_tree *tree);
+
+extern rt_iter *rt_begin_iterate(radix_tree *tree);
+extern bool rt_iterate_next(rt_iter *iter, uint64 *key_p, Datum *value_p);
+extern void rt_end_iterate(rt_iter *iter);
+
+
+#ifdef RT_DEBUG
+extern void rt_dump(radix_tree *tree);
+extern void rt_dump_search(radix_tree *tree, uint64 key);
+extern void rt_stats(radix_tree *tree);
+#endif
+
+#endif							/* RADIXTREE_H */
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 9090226daa..51b2514faf 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -24,6 +24,7 @@ SUBDIRS = \
 		  test_parser \
 		  test_pg_dump \
 		  test_predtest \
+		  test_radixtree \
 		  test_rbtree \
 		  test_regex \
 		  test_rls_hooks \
diff --git a/src/test/modules/test_radixtree/.gitignore b/src/test/modules/test_radixtree/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/src/test/modules/test_radixtree/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_radixtree/Makefile b/src/test/modules/test_radixtree/Makefile
new file mode 100644
index 0000000000..da06b93da3
--- /dev/null
+++ b/src/test/modules/test_radixtree/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_radixtree/Makefile
+
+MODULE_big = test_radixtree
+OBJS = \
+	$(WIN32RES) \
+	test_radixtree.o
+PGFILEDESC = "test_radixtree - test code for src/backend/lib/radixtree.c"
+
+EXTENSION = test_radixtree
+DATA = test_radixtree--1.0.sql
+
+REGRESS = test_radixtree
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_radixtree
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_radixtree/README b/src/test/modules/test_radixtree/README
new file mode 100644
index 0000000000..a8b271869a
--- /dev/null
+++ b/src/test/modules/test_radixtree/README
@@ -0,0 +1,7 @@
+test_integerset contains unit tests for testing the integer set implementation
+in src/backend/lib/integerset.c.
+
+The tests verify the correctness of the implementation, but they can also be
+used as a micro-benchmark.  If you set the 'intset_test_stats' flag in
+test_integerset.c, the tests will print extra information about execution time
+and memory usage.
diff --git a/src/test/modules/test_radixtree/expected/test_radixtree.out b/src/test/modules/test_radixtree/expected/test_radixtree.out
new file mode 100644
index 0000000000..cc6970c87c
--- /dev/null
+++ b/src/test/modules/test_radixtree/expected/test_radixtree.out
@@ -0,0 +1,28 @@
+CREATE EXTENSION test_radixtree;
+--
+-- All the logic is in the test_radixtree() function. It will throw
+-- an error if something fails.
+--
+SELECT test_radixtree();
+NOTICE:  testing radix tree node types with shift "0"
+NOTICE:  testing radix tree node types with shift "8"
+NOTICE:  testing radix tree node types with shift "16"
+NOTICE:  testing radix tree node types with shift "24"
+NOTICE:  testing radix tree node types with shift "32"
+NOTICE:  testing radix tree node types with shift "40"
+NOTICE:  testing radix tree node types with shift "48"
+NOTICE:  testing radix tree node types with shift "56"
+NOTICE:  testing radix tree with pattern "all ones"
+NOTICE:  testing radix tree with pattern "alternating bits"
+NOTICE:  testing radix tree with pattern "clusters of ten"
+NOTICE:  testing radix tree with pattern "clusters of hundred"
+NOTICE:  testing radix tree with pattern "one-every-64k"
+NOTICE:  testing radix tree with pattern "sparse"
+NOTICE:  testing radix tree with pattern "single values, distance > 2^32"
+NOTICE:  testing radix tree with pattern "clusters, distance > 2^32"
+NOTICE:  testing radix tree with pattern "clusters, distance > 2^60"
+ test_radixtree 
+----------------
+ 
+(1 row)
+
diff --git a/src/test/modules/test_radixtree/sql/test_radixtree.sql b/src/test/modules/test_radixtree/sql/test_radixtree.sql
new file mode 100644
index 0000000000..41ece5e9f5
--- /dev/null
+++ b/src/test/modules/test_radixtree/sql/test_radixtree.sql
@@ -0,0 +1,7 @@
+CREATE EXTENSION test_radixtree;
+
+--
+-- All the logic is in the test_radixtree() function. It will throw
+-- an error if something fails.
+--
+SELECT test_radixtree();
diff --git a/src/test/modules/test_radixtree/test_radixtree--1.0.sql b/src/test/modules/test_radixtree/test_radixtree--1.0.sql
new file mode 100644
index 0000000000..074a5a7ea7
--- /dev/null
+++ b/src/test/modules/test_radixtree/test_radixtree--1.0.sql
@@ -0,0 +1,8 @@
+/* src/test/modules/test_radixtree/test_radixtree--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_radixtree" to load this file. \quit
+
+CREATE FUNCTION test_radixtree()
+RETURNS pg_catalog.void STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/src/test/modules/test_radixtree/test_radixtree.c b/src/test/modules/test_radixtree/test_radixtree.c
new file mode 100644
index 0000000000..384b1fc41d
--- /dev/null
+++ b/src/test/modules/test_radixtree/test_radixtree.c
@@ -0,0 +1,503 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_radixtree.c
+ *		Test radixtree set data structure.
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/test/modules/test_radixtree/test_radixtree.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/pg_prng.h"
+#include "fmgr.h"
+#include "lib/radixtree.h"
+#include "miscadmin.h"
+#include "nodes/bitmapset.h"
+#include "storage/block.h"
+#include "storage/itemptr.h"
+#include "utils/memutils.h"
+#include "utils/timestamp.h"
+
+#define UINT64_HEX_FORMAT "%" INT64_MODIFIER "X"
+
+/*
+ * If you enable this, the "pattern" tests will print information about
+ * how long populating, probing, and iterating the test set takes, and
+ * how much memory the test set consumed.  That can be used as
+ * micro-benchmark of various operations and input patterns (you might
+ * want to increase the number of values used in each of the test, if
+ * you do that, to reduce noise).
+ *
+ * The information is printed to the server's stderr, mostly because
+ * that's where MemoryContextStats() output goes.
+ */
+static const bool rt_test_stats = false;
+
+/* The maximum number of entries each node type can have */
+static int rt_node_max_entries[] = {
+	4,		/* RT_NODE_KIND_4 */
+	16,		/* RT_NODE_KIND_16 */
+	32,		/* RT_NODE_KIND_32 */
+	128,	/* RT_NODE_KIND_128 */
+	256		/* RT_NODE_KIND_256 */
+};
+
+/*
+ * A struct to define a pattern of integers, for use with the test_pattern()
+ * function.
+ */
+typedef struct
+{
+	char	   *test_name;		/* short name of the test, for humans */
+	char	   *pattern_str;	/* a bit pattern */
+	uint64		spacing;		/* pattern repeats at this interval */
+	uint64		num_values;		/* number of integers to set in total */
+} test_spec;
+
+/* Test patterns borrowed from test_integerset.c */
+static const test_spec test_specs[] = {
+	{
+		"all ones", "1111111111",
+		10, 1000000
+	},
+	{
+		"alternating bits", "0101010101",
+		10, 1000000
+	},
+	{
+		"clusters of ten", "1111111111",
+		10000, 1000000
+	},
+	{
+		"clusters of hundred",
+		"1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
+		10000, 10000000
+	},
+	{
+		"one-every-64k", "1",
+		65536, 1000000
+	},
+	{
+		"sparse", "100000000000000000000000000000001",
+		10000000, 1000000
+	},
+	{
+		"single values, distance > 2^32", "1",
+		UINT64CONST(10000000000), 100000
+	},
+	{
+		"clusters, distance > 2^32", "10101010",
+		UINT64CONST(10000000000), 1000000
+	},
+	{
+		"clusters, distance > 2^60", "10101010",
+		UINT64CONST(2000000000000000000),
+		23						/* can't be much higher than this, or we
+								 * overflow uint64 */
+	}
+};
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(test_radixtree);
+
+static void
+test_empty(void)
+{
+	radix_tree *radixtree;
+	Datum dummy;
+
+	radixtree = rt_create(CurrentMemoryContext);
+
+	if (rt_search(radixtree, 0, &dummy))
+		elog(ERROR, "rt_search on empty tree returned true");
+
+	if (rt_search(radixtree, 1, &dummy))
+		elog(ERROR, "rt_search on empty tree returned true");
+
+	if (rt_search(radixtree, PG_UINT64_MAX, &dummy))
+		elog(ERROR, "rt_search on empty tree returned true");
+
+	if (rt_num_entries(radixtree) != 0)
+		elog(ERROR, "rt_num_entries on empty tree return non-zero");
+
+	rt_free(radixtree);
+}
+
+/*
+ * Check if keys from start to end with the shift exist in the tree.
+ */
+static void
+check_search_on_node(radix_tree *radixtree, uint8 shift, int start, int end)
+{
+	for (int i = start; i < end; i++)
+	{
+		uint64 key = ((uint64) i << shift);
+		Datum val;
+
+		if (!rt_search(radixtree, key, &val))
+			elog(ERROR, "key 0x" UINT64_HEX_FORMAT " is not found on node-%d",
+				 key, end);
+		if (DatumGetUInt64(val) != key)
+			elog(ERROR, "rt_search with key 0x" UINT64_HEX_FORMAT " returns 0x" UINT64_HEX_FORMAT ", expected 0x" UINT64_HEX_FORMAT,
+				 key, DatumGetUInt64(val), key);
+	}
+}
+
+static void
+test_node_types_insert(radix_tree *radixtree, uint8 shift)
+{
+	uint64 num_entries;
+
+	for (int i = 0; i < 256; i++)
+	{
+		uint64 key = ((uint64) i << shift);
+		bool found;
+
+		rt_insert(radixtree, key, Int64GetDatum(key), &found);
+
+		if (found)
+			elog(ERROR, "newly inserted key 0x" UINT64_HEX_FORMAT " found", key);
+
+		for (int j = 0; j < lengthof(rt_node_max_entries); j++)
+		{
+			/*
+			 * After filling all slots in each node type, check if the values are
+			 * stored properly.
+			 */
+			if (i == (rt_node_max_entries[j] - 1))
+			{
+				check_search_on_node(radixtree, shift,
+									 (j == 0) ? 0 : rt_node_max_entries[j - 1],
+									 rt_node_max_entries[j]);
+				break;
+			}
+		}
+	}
+
+	num_entries = rt_num_entries(radixtree);
+
+	if (num_entries != 256)
+		elog(ERROR,
+			 "rt_num_entries returned" UINT64_FORMAT ", expected " UINT64_FORMAT,
+			 num_entries, UINT64CONST(256));
+}
+
+static void
+test_node_types_delete(radix_tree *radixtree, uint8 shift)
+{
+	uint64 num_entries;
+
+	for (int i = 0; i < 256; i++)
+	{
+		uint64	key = ((uint64) i << shift);
+		bool	found;
+
+		found = rt_delete(radixtree, key);
+
+		if (!found)
+			elog(ERROR, "inserted key 0x" UINT64_HEX_FORMAT " is not found", key);
+	}
+
+	num_entries = rt_num_entries(radixtree);
+
+	/* The tree must be empty */
+	if (num_entries != 0)
+		elog(ERROR,
+			 "rt_num_entries returned" UINT64_FORMAT ", expected " UINT64_FORMAT,
+			 num_entries, UINT64CONST(256));
+}
+
+/*
+ * Test for inserting and deleting key-value pairs to each node type at the given shift
+ * level.
+ */
+static void
+test_node_types(uint8 shift)
+{
+	radix_tree *radixtree;
+
+	elog(NOTICE, "testing radix tree node types with shift \"%d\"", shift);
+
+	radixtree = rt_create(CurrentMemoryContext);
+
+	/*
+	 * Insert and search entries for every node type at the 'shift' level,
+	 * then delete all entries to make it empty, and insert and search
+	 * entries again.
+	 */
+	test_node_types_insert(radixtree, shift);
+	test_node_types_delete(radixtree, shift);
+	test_node_types_insert(radixtree, shift);
+
+	rt_free(radixtree);
+}
+
+/*
+ * Test with a repeating pattern, defined by the 'spec'.
+ */
+static void
+test_pattern(const test_spec *spec)
+{
+	radix_tree *radixtree;
+	rt_iter *iter;
+	MemoryContext radixtree_ctx;
+	TimestampTz starttime;
+	TimestampTz endtime;
+	uint64		n;
+	uint64		last_int;
+	uint64		ndeleted;
+	uint64		nbefore;
+	uint64		nafter;
+	int			patternlen;
+	uint64	   *pattern_values;
+	uint64		pattern_num_values;
+
+	elog(NOTICE, "testing radix tree with pattern \"%s\"", spec->test_name);
+	if (rt_test_stats)
+		fprintf(stderr, "-----\ntesting radix tree with pattern \"%s\"\n", spec->test_name);
+
+	/* Pre-process the pattern, creating an array of integers from it. */
+	patternlen = strlen(spec->pattern_str);
+	pattern_values = palloc(patternlen * sizeof(uint64));
+	pattern_num_values = 0;
+	for (int i = 0; i < patternlen; i++)
+	{
+		if (spec->pattern_str[i] == '1')
+			pattern_values[pattern_num_values++] = i;
+	}
+
+	/*
+	 * Allocate the radix tree.
+	 *
+	 * Allocate it in a separate memory context, so that we can print its
+	 * memory usage easily.
+	 */
+	radixtree_ctx = AllocSetContextCreate(CurrentMemoryContext,
+										  "radixtree test",
+										  ALLOCSET_SMALL_SIZES);
+	MemoryContextSetIdentifier(radixtree_ctx, spec->test_name);
+	radixtree = rt_create(radixtree_ctx);
+
+	/*
+	 * Add values to the set.
+	 */
+	starttime = GetCurrentTimestamp();
+
+	n = 0;
+	last_int = 0;
+	while (n < spec->num_values)
+	{
+		uint64		x = 0;
+
+		for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
+		{
+			bool found;
+
+			x = last_int + pattern_values[i];
+
+			rt_insert(radixtree, x, Int64GetDatum(x), &found);
+
+			if (found)
+				elog(ERROR, "newly inserted key 0x" UINT64_HEX_FORMAT " found", x);
+
+			n++;
+		}
+		last_int += spec->spacing;
+	}
+
+	endtime = GetCurrentTimestamp();
+
+	if (rt_test_stats)
+		fprintf(stderr, "added " UINT64_FORMAT " values in %d ms\n",
+				spec->num_values, (int) (endtime - starttime) / 1000);
+
+	/*
+	 * Print stats on the amount of memory used.
+	 *
+	 * We print the usage reported by rt_memory_usage(), as well as the
+	 * stats from the memory context.  They should be in the same ballpark,
+	 * but it's hard to automate testing that, so if you're making changes to
+	 * the implementation, just observe that manually.
+	 */
+	if (rt_test_stats)
+	{
+		uint64		mem_usage;
+
+		/*
+		 * Also print memory usage as reported by rt_memory_usage().  It
+		 * should be in the same ballpark as the usage reported by
+		 * MemoryContextStats().
+		 */
+		mem_usage = rt_memory_usage(radixtree);
+		fprintf(stderr, "rt_memory_usage() reported " UINT64_FORMAT " (%0.2f bytes / integer)\n",
+				mem_usage, (double) mem_usage / spec->num_values);
+
+		MemoryContextStats(radixtree_ctx);
+	}
+
+	/* Check that rt_num_entries works */
+	n = rt_num_entries(radixtree);
+	if (n != spec->num_values)
+		elog(ERROR, "rt_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT, n, spec->num_values);
+
+	/*
+	 * Test random-access probes with rt_search()
+	 */
+	starttime = GetCurrentTimestamp();
+
+	for (n = 0; n < 100000; n++)
+	{
+		bool		found;
+		bool		expected;
+		uint64		x;
+		Datum		v;
+
+		/*
+		 * Pick next value to probe at random.  We limit the probes to the
+		 * last integer that we added to the set, plus an arbitrary constant
+		 * (1000).  There's no point in probing the whole 0 - 2^64 range, if
+		 * only a small part of the integer space is used.  We would very
+		 * rarely hit values that are actually in the set.
+		 */
+		x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
+
+		/* Do we expect this value to be present in the set? */
+		if (x >= last_int)
+			expected = false;
+		else
+		{
+			uint64		idx = x % spec->spacing;
+
+			if (idx >= patternlen)
+				expected = false;
+			else if (spec->pattern_str[idx] == '1')
+				expected = true;
+			else
+				expected = false;
+		}
+
+		/* Is it present according to rt_search() ? */
+		found = rt_search(radixtree, x, &v);
+
+		if (found != expected)
+			elog(ERROR, "mismatch at 0x" UINT64_HEX_FORMAT ": %d vs %d", x, found, expected);
+		if (found && (DatumGetUInt64(v) != x))
+			elog(ERROR, "found 0x" UINT64_HEX_FORMAT ", expected 0x" UINT64_HEX_FORMAT,
+				 DatumGetUInt64(v), x);
+	}
+	endtime = GetCurrentTimestamp();
+	if (rt_test_stats)
+		fprintf(stderr, "probed " UINT64_FORMAT " values in %d ms\n",
+				n, (int) (endtime - starttime) / 1000);
+
+	/*
+	 * Test iterator
+	 */
+	starttime = GetCurrentTimestamp();
+
+	iter = rt_begin_iterate(radixtree);
+	n = 0;
+	last_int = 0;
+	while (n < spec->num_values)
+	{
+		for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
+		{
+			uint64		expected = last_int + pattern_values[i];
+			uint64		x;
+			uint64		val;
+
+			if (!rt_iterate_next(iter, &x, &val))
+				break;
+
+			if (x != expected)
+				elog(ERROR,
+					 "iterate returned wrong key; got 0x" UINT64_HEX_FORMAT ", expected 0x" UINT64_HEX_FORMAT " at %d", x, expected, i);
+			if (DatumGetUInt64(val) != expected)
+				elog(ERROR,
+					 "iterate returned wrong value; got 0x" UINT64_HEX_FORMAT ", expected 0x" UINT64_HEX_FORMAT " at %d", x, expected, i);
+			n++;
+		}
+		last_int += spec->spacing;
+	}
+	endtime = GetCurrentTimestamp();
+	if (rt_test_stats)
+		fprintf(stderr, "iterated " UINT64_FORMAT " values in %d ms\n",
+				n, (int) (endtime - starttime) / 1000);
+
+	if (n < spec->num_values)
+		elog(ERROR, "iterator stopped short after " UINT64_FORMAT " entries, expected " UINT64_FORMAT, n, spec->num_values);
+	if (n > spec->num_values)
+		elog(ERROR, "iterator returned " UINT64_FORMAT " entries, " UINT64_FORMAT " was expected", n, spec->num_values);
+
+	/*
+	 * Test random-access probes with rt_delete()
+	 */
+	starttime = GetCurrentTimestamp();
+
+	nbefore = rt_num_entries(radixtree);
+	ndeleted = 0;
+	for (n = 0; n < 100000; n++)
+	{
+		bool		found;
+		uint64		x;
+		Datum		v;
+
+		/*
+		 * Pick next value to probe at random.  We limit the probes to the
+		 * last integer that we added to the set, plus an arbitrary constant
+		 * (1000).  There's no point in probing the whole 0 - 2^64 range, if
+		 * only a small part of the integer space is used.  We would very
+		 * rarely hit values that are actually in the set.
+		 */
+		x = pg_prng_uint64_range(&pg_global_prng_state, 0, last_int + 1000);
+
+		/* Is it present according to rt_search() ? */
+		found = rt_search(radixtree, x, &v);
+
+		if (!found)
+			continue;
+
+		/* If the key is found, delete it and check again */
+		if (!rt_delete(radixtree, x))
+			elog(ERROR, "could not delete key 0x" UINT64_HEX_FORMAT, x);
+		if (rt_search(radixtree, x, &v))
+			elog(ERROR, "found deleted key 0x" UINT64_HEX_FORMAT, x);
+		if (rt_delete(radixtree, x))
+			elog(ERROR, "deleted already-deleted key 0x" UINT64_HEX_FORMAT, x);
+
+		ndeleted++;
+	}
+	endtime = GetCurrentTimestamp();
+	if (rt_test_stats)
+		fprintf(stderr, "deleted " UINT64_FORMAT " values in %d ms\n",
+				ndeleted, (int) (endtime - starttime) / 1000);
+
+	nafter = rt_num_entries(radixtree);
+
+	/* Check that rt_num_entries works */
+	if ((nbefore - ndeleted) != nafter)
+		elog(ERROR, "rt_num_entries returned " UINT64_FORMAT ", expected " UINT64_FORMAT "after " UINT64_FORMAT " deletion",
+			 nafter, (nbefore - ndeleted), ndeleted);
+
+	MemoryContextDelete(radixtree_ctx);
+}
+
+Datum
+test_radixtree(PG_FUNCTION_ARGS)
+{
+	test_empty();
+
+	for (int shift = 0; shift <= (64 - 8); shift += 8)
+		test_node_types(shift);
+
+	/* Test different test patterns, with lots of entries */
+	for (int i = 0; i < lengthof(test_specs); i++)
+		test_pattern(&test_specs[i]);
+
+	PG_RETURN_VOID();
+}
diff --git a/src/test/modules/test_radixtree/test_radixtree.control b/src/test/modules/test_radixtree/test_radixtree.control
new file mode 100644
index 0000000000..e53f2a3e0c
--- /dev/null
+++ b/src/test/modules/test_radixtree/test_radixtree.control
@@ -0,0 +1,4 @@
+comment = 'Test code for radix tree'
+default_version = '1.0'
+module_pathname = '$libdir/test_radixtree'
+relocatable = true
