From bc4f8f9b43dc050ac2fa92d0770eb63c822838b7 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Tue, 27 Jun 2023 15:59:23 +0200
Subject: [PATCH v1 1/2] Expose f_smgr to extensions for manual implementation

There are various reasons why one would want to create their own
implementation of a storage manager, among which are block-level compression,
encryption and offloading to cold storage. This patch is a first patch that
allows extensions to register their own SMgr.

Note, however, that this SMgr is not yet used - only the first SMgr to register
is used, and this is currently the md.c smgr. Future commits will include
facilities to select an SMgr for each tablespace.
---
 src/backend/postmaster/postmaster.c |   5 +
 src/backend/storage/smgr/md.c       | 164 ++++++++++++++++++----------
 src/backend/storage/smgr/smgr.c     | 126 ++++++++++-----------
 src/backend/utils/init/miscinit.c   |  12 ++
 src/include/miscadmin.h             |   1 +
 src/include/storage/md.h            |   4 +
 src/include/storage/smgr.h          |  56 ++++++++--
 7 files changed, 242 insertions(+), 126 deletions(-)

diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 4c49393fc5..8685b9fde6 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1002,6 +1002,11 @@ PostmasterMain(int argc, char *argv[])
 	 */
 	ApplyLauncherRegister();
 
+	/*
+	 * Register built-in managers that are not part of static arrays
+	 */
+	register_builtin_dynamic_managers();
+
 	/*
 	 * process any libraries that should be preloaded at postmaster start
 	 */
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 30dbc02f82..690bdd27c5 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -86,6 +86,21 @@ typedef struct _MdfdVec
 } MdfdVec;
 
 static MemoryContext MdCxt;		/* context for all MdfdVec objects */
+SMgrId MdSMgrId;
+
+typedef struct MdSMgrRelationData
+{
+	/* parent data */
+	SMgrRelationData reln;
+	/*
+	 * for md.c; per-fork arrays of the number of open segments
+	 * (md_num_open_segs) and the segments themselves (md_seg_fds).
+	 */
+	int			md_num_open_segs[MAX_FORKNUM + 1];
+	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+} MdSMgrRelationData;
+
+typedef MdSMgrRelationData *MdSMgrRelation;
 
 
 /* Populate a file tag describing an md.c segment file. */
@@ -120,26 +135,52 @@ static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 #define EXTENSION_DONT_OPEN			(1 << 5)
 
 
+void mdsmgr_register(void)
+{
+	/* magnetic disk */
+	f_smgr md_smgr = (f_smgr) {
+		.name = "md",
+		.smgr_init = mdinit,
+		.smgr_shutdown = NULL,
+		.smgr_open = mdopen,
+		.smgr_close = mdclose,
+		.smgr_create = mdcreate,
+		.smgr_exists = mdexists,
+		.smgr_unlink = mdunlink,
+		.smgr_extend = mdextend,
+		.smgr_zeroextend = mdzeroextend,
+		.smgr_prefetch = mdprefetch,
+		.smgr_read = mdread,
+		.smgr_write = mdwrite,
+		.smgr_writeback = mdwriteback,
+		.smgr_nblocks = mdnblocks,
+		.smgr_truncate = mdtruncate,
+		.smgr_immedsync = mdimmedsync,
+	};
+
+	MdSMgrId = smgr_register(&md_smgr, sizeof(MdSMgrRelationData));
+}
+
 /* local routines */
 static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum,
 						 bool isRedo);
-static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior);
-static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
+static MdfdVec *mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior);
+static void register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum,
 								   MdfdVec *seg);
 static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum,
 									BlockNumber segno);
 static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum,
 									BlockNumber segno);
-static void _fdvec_resize(SMgrRelation reln,
+static void _fdvec_resize(MdSMgrRelation reln,
 						  ForkNumber forknum,
 						  int nseg);
-static char *_mdfd_segpath(SMgrRelation reln, ForkNumber forknum,
+static char *_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum,
 						   BlockNumber segno);
-static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum,
+static MdfdVec *_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum,
 							  BlockNumber segno, int oflags);
-static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum,
+static MdfdVec *_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum,
 							 BlockNumber blkno, bool skipFsync, int behavior);
-static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
+static BlockNumber _mdnblocks(MdSMgrRelation reln, ForkNumber forknum,
 							  MdfdVec *seg);
 
 static inline int
@@ -194,11 +235,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 	MdfdVec    *mdfd;
 	char	   *path;
 	File		fd;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
+	Assert(reln->smgr_which == MdSMgrId);
 
-	if (isRedo && reln->md_num_open_segs[forknum] > 0)
+	if (isRedo && mdreln->md_num_open_segs[forknum] > 0)
 		return;					/* created and opened already... */
 
-	Assert(reln->md_num_open_segs[forknum] == 0);
+	Assert(mdreln->md_num_open_segs[forknum] == 0);
 
 	/*
 	 * We may be using the target table space for the first time in this
@@ -235,8 +278,8 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo)
 
 	pfree(path);
 
-	_fdvec_resize(reln, forknum, 1);
-	mdfd = &reln->md_seg_fds[forknum][0];
+	_fdvec_resize(mdreln, forknum, 1);
+	mdfd = &mdreln->md_seg_fds[forknum][0];
 	mdfd->mdfd_vfd = fd;
 	mdfd->mdfd_segno = 0;
 }
@@ -462,6 +505,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	/* If this build supports direct I/O, the buffer must be I/O aligned. */
 	if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
@@ -485,7 +529,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 						relpath(reln->smgr_rlocator, forknum),
 						InvalidBlockNumber)));
 
-	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
+	v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
 
@@ -509,9 +553,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	}
 
 	if (!skipFsync && !SmgrIsTemp(reln))
-		register_dirty_segment(reln, forknum, v);
+		register_dirty_segment(mdreln, forknum, v);
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
 }
 
 /*
@@ -527,6 +571,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 	MdfdVec    *v;
 	BlockNumber curblocknum = blocknum;
 	int			remblocks = nblocks;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	Assert(nblocks > 0);
 
@@ -558,7 +603,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		else
 			numblocks = remblocks;
 
-		v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
+		v = _mdfd_getseg(mdreln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
 
 		Assert(segstartblock < RELSEG_SIZE);
 		Assert(segstartblock + numblocks <= RELSEG_SIZE);
@@ -613,9 +658,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		}
 
 		if (!skipFsync && !SmgrIsTemp(reln))
-			register_dirty_segment(reln, forknum, v);
+			register_dirty_segment(mdreln, forknum, v);
 
-		Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+		Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
 
 		remblocks -= numblocks;
 		curblocknum += numblocks;
@@ -633,7 +678,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
  * invent one out of whole cloth.
  */
 static MdfdVec *
-mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
+mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior)
 {
 	MdfdVec    *mdfd;
 	char	   *path;
@@ -643,7 +688,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
 	if (reln->md_num_open_segs[forknum] > 0)
 		return &reln->md_seg_fds[forknum][0];
 
-	path = relpath(reln->smgr_rlocator, forknum);
+	path = relpath(reln->reln.smgr_rlocator, forknum);
 
 	fd = PathNameOpenFile(path, _mdfd_open_flags());
 
@@ -678,9 +723,10 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
 void
 mdopen(SMgrRelation reln)
 {
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 	/* mark it not open */
 	for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++)
-		reln->md_num_open_segs[forknum] = 0;
+		mdreln->md_num_open_segs[forknum] = 0;
 }
 
 /*
@@ -689,7 +735,8 @@ mdopen(SMgrRelation reln)
 void
 mdclose(SMgrRelation reln, ForkNumber forknum)
 {
-	int			nopensegs = reln->md_num_open_segs[forknum];
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
+	int			nopensegs = mdreln->md_num_open_segs[forknum];
 
 	/* No work if already closed */
 	if (nopensegs == 0)
@@ -698,10 +745,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
 	/* close segments starting from the end */
 	while (nopensegs > 0)
 	{
-		MdfdVec    *v = &reln->md_seg_fds[forknum][nopensegs - 1];
+		MdfdVec    *v = &mdreln->md_seg_fds[forknum][nopensegs - 1];
 
 		FileClose(v->mdfd_vfd);
-		_fdvec_resize(reln, forknum, nopensegs - 1);
+		_fdvec_resize(mdreln, forknum, nopensegs - 1);
 		nopensegs--;
 	}
 }
@@ -715,10 +762,11 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 #ifdef USE_PREFETCH
 	off_t		seekpos;
 	MdfdVec    *v;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
 
-	v = _mdfd_getseg(reln, forknum, blocknum, false,
+	v = _mdfd_getseg(mdreln, forknum, blocknum, false,
 					 InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL);
 	if (v == NULL)
 		return false;
@@ -743,6 +791,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	/* If this build supports direct I/O, the buffer must be I/O aligned. */
 	if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
@@ -754,7 +803,7 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 										reln->smgr_rlocator.locator.relNumber,
 										reln->smgr_rlocator.backend);
 
-	v = _mdfd_getseg(reln, forknum, blocknum, false,
+	v = _mdfd_getseg(mdreln, forknum, blocknum, false,
 					 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -812,6 +861,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	off_t		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	/* If this build supports direct I/O, the buffer must be I/O aligned. */
 	if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
@@ -828,7 +878,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 										 reln->smgr_rlocator.locator.relNumber,
 										 reln->smgr_rlocator.backend);
 
-	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
+	v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync,
 					 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
 	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
@@ -863,7 +913,7 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	}
 
 	if (!skipFsync && !SmgrIsTemp(reln))
-		register_dirty_segment(reln, forknum, v);
+		register_dirty_segment(mdreln, forknum, v);
 }
 
 /*
@@ -876,6 +926,7 @@ void
 mdwriteback(SMgrRelation reln, ForkNumber forknum,
 			BlockNumber blocknum, BlockNumber nblocks)
 {
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 	Assert((io_direct_flags & IO_DIRECT_DATA) == 0);
 
 	/*
@@ -890,7 +941,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 		int			segnum_start,
 					segnum_end;
 
-		v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ ,
+		v = _mdfd_getseg(mdreln, forknum, blocknum, true /* not used */ ,
 						 EXTENSION_DONT_OPEN);
 
 		/*
@@ -937,11 +988,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 	MdfdVec    *v;
 	BlockNumber nblocks;
 	BlockNumber segno;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
-	mdopenfork(reln, forknum, EXTENSION_FAIL);
+	mdopenfork(mdreln, forknum, EXTENSION_FAIL);
 
 	/* mdopen has opened the first segment */
-	Assert(reln->md_num_open_segs[forknum] > 0);
+	Assert(mdreln->md_num_open_segs[forknum] > 0);
 
 	/*
 	 * Start from the last open segments, to avoid redundant seeks.  We have
@@ -956,12 +1008,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 	 * that's OK because the checkpointer never needs to compute relation
 	 * size.)
 	 */
-	segno = reln->md_num_open_segs[forknum] - 1;
-	v = &reln->md_seg_fds[forknum][segno];
+	segno = mdreln->md_num_open_segs[forknum] - 1;
+	v = &mdreln->md_seg_fds[forknum][segno];
 
 	for (;;)
 	{
-		nblocks = _mdnblocks(reln, forknum, v);
+		nblocks = _mdnblocks(mdreln, forknum, v);
 		if (nblocks > ((BlockNumber) RELSEG_SIZE))
 			elog(FATAL, "segment too big");
 		if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -979,7 +1031,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 		 * undermines _mdfd_getseg's attempts to notice and report an error
 		 * upon access to a missing segment.
 		 */
-		v = _mdfd_openseg(reln, forknum, segno, 0);
+		v = _mdfd_openseg(mdreln, forknum, segno, 0);
 		if (v == NULL)
 			return segno * ((BlockNumber) RELSEG_SIZE);
 	}
@@ -994,6 +1046,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	BlockNumber curnblk;
 	BlockNumber priorblocks;
 	int			curopensegs;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	/*
 	 * NOTE: mdnblocks makes sure we have opened all active segments, so that
@@ -1017,14 +1070,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	 * Truncate segments, starting at the last one. Starting at the end makes
 	 * managing the memory for the fd array easier, should there be errors.
 	 */
-	curopensegs = reln->md_num_open_segs[forknum];
+	curopensegs = mdreln->md_num_open_segs[forknum];
 	while (curopensegs > 0)
 	{
 		MdfdVec    *v;
 
 		priorblocks = (curopensegs - 1) * RELSEG_SIZE;
 
-		v = &reln->md_seg_fds[forknum][curopensegs - 1];
+		v = &mdreln->md_seg_fds[forknum][curopensegs - 1];
 
 		if (priorblocks > nblocks)
 		{
@@ -1039,13 +1092,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 								FilePathName(v->mdfd_vfd))));
 
 			if (!SmgrIsTemp(reln))
-				register_dirty_segment(reln, forknum, v);
+				register_dirty_segment(mdreln, forknum, v);
 
 			/* we never drop the 1st segment */
-			Assert(v != &reln->md_seg_fds[forknum][0]);
+			Assert(v != &mdreln->md_seg_fds[forknum][0]);
 
 			FileClose(v->mdfd_vfd);
-			_fdvec_resize(reln, forknum, curopensegs - 1);
+			_fdvec_resize(mdreln, forknum, curopensegs - 1);
 		}
 		else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
 		{
@@ -1065,7 +1118,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 								FilePathName(v->mdfd_vfd),
 								nblocks)));
 			if (!SmgrIsTemp(reln))
-				register_dirty_segment(reln, forknum, v);
+				register_dirty_segment(mdreln, forknum, v);
 		}
 		else
 		{
@@ -1095,6 +1148,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 {
 	int			segno;
 	int			min_inactive_seg;
+	MdSMgrRelation mdreln = (MdSMgrRelation) reln;
 
 	/*
 	 * NOTE: mdnblocks makes sure we have opened all active segments, so that
@@ -1102,7 +1156,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	 */
 	mdnblocks(reln, forknum);
 
-	min_inactive_seg = segno = reln->md_num_open_segs[forknum];
+	min_inactive_seg = segno = mdreln->md_num_open_segs[forknum];
 
 	/*
 	 * Temporarily open inactive segments, then close them after sync.  There
@@ -1110,12 +1164,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 	 * is harmless.  We don't bother to clean them up and take a risk of
 	 * further trouble.  The next mdclose() will soon close them.
 	 */
-	while (_mdfd_openseg(reln, forknum, segno, 0) != NULL)
+	while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL)
 		segno++;
 
 	while (segno > 0)
 	{
-		MdfdVec    *v = &reln->md_seg_fds[forknum][segno - 1];
+		MdfdVec    *v = &mdreln->md_seg_fds[forknum][segno - 1];
 
 		/*
 		 * fsyncs done through mdimmedsync() should be tracked in a separate
@@ -1136,7 +1190,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
 		if (segno > min_inactive_seg)
 		{
 			FileClose(v->mdfd_vfd);
-			_fdvec_resize(reln, forknum, segno - 1);
+			_fdvec_resize(mdreln, forknum, segno - 1);
 		}
 
 		segno--;
@@ -1153,14 +1207,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
  * enough to be a performance problem).
  */
 static void
-register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
+register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 {
 	FileTag		tag;
 
-	INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno);
+	INIT_MD_FILETAG(tag, reln->reln.smgr_rlocator.locator, forknum, seg->mdfd_segno);
 
 	/* Temp relations should never be fsync'd */
-	Assert(!SmgrIsTemp(reln));
+	Assert(!SmgrIsTemp(&reln->reln));
 
 	if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ ))
 	{
@@ -1278,7 +1332,7 @@ DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo)
  * _fdvec_resize() -- Resize the fork's open segments array
  */
 static void
-_fdvec_resize(SMgrRelation reln,
+_fdvec_resize(MdSMgrRelation reln,
 			  ForkNumber forknum,
 			  int nseg)
 {
@@ -1316,12 +1370,12 @@ _fdvec_resize(SMgrRelation reln,
  * returned string is palloc'd.
  */
 static char *
-_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
+_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno)
 {
 	char	   *path,
 			   *fullpath;
 
-	path = relpath(reln->smgr_rlocator, forknum);
+	path = relpath(reln->reln.smgr_rlocator, forknum);
 
 	if (segno > 0)
 	{
@@ -1339,7 +1393,7 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno)
  * and make a MdfdVec object for it.  Returns NULL on failure.
  */
 static MdfdVec *
-_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
+_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno,
 			  int oflags)
 {
 	MdfdVec    *v;
@@ -1384,7 +1438,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
  * EXTENSION_CREATE case.
  */
 static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
+_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			 bool skipFsync, int behavior)
 {
 	MdfdVec    *v;
@@ -1458,7 +1512,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 				char	   *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
 													 MCXT_ALLOC_ZERO);
 
-				mdextend(reln, forknum,
+				mdextend((SMgrRelation) reln, forknum,
 						 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
 						 zerobuf, skipFsync);
 				pfree(zerobuf);
@@ -1515,7 +1569,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
  * Get number of blocks present in a single disk file
  */
 static BlockNumber
-_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
+_mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 {
 	off_t		len;
 
@@ -1538,7 +1592,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
 int
 mdsyncfiletag(const FileTag *ftag, char *path)
 {
-	SMgrRelation reln = smgropen(ftag->rlocator, InvalidBackendId);
+	MdSMgrRelation reln = (MdSMgrRelation) smgropen(ftag->rlocator, InvalidBackendId);
 	File		file;
 	instr_time	io_start;
 	bool		need_to_close;
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index f76c4605db..d37202609f 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -19,77 +19,23 @@
 
 #include "access/xlogutils.h"
 #include "lib/ilist.h"
+#include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/md.h"
 #include "storage/smgr.h"
+#include "port/atomics.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
+#include "utils/memutils.h"
 
 
-/*
- * This struct of function pointers defines the API between smgr.c and
- * any individual storage manager module.  Note that smgr subfunctions are
- * generally expected to report problems via elog(ERROR).  An exception is
- * that smgr_unlink should use elog(WARNING), rather than erroring out,
- * because we normally unlink relations during post-commit/abort cleanup,
- * and so it's too late to raise an error.  Also, various conditions that
- * would normally be errors should be allowed during bootstrap and/or WAL
- * recovery --- see comments in md.c for details.
- */
-typedef struct f_smgr
-{
-	void		(*smgr_init) (void);	/* may be NULL */
-	void		(*smgr_shutdown) (void);	/* may be NULL */
-	void		(*smgr_open) (SMgrRelation reln);
-	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
-								bool isRedo);
-	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
-								bool isRedo);
-	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
-								BlockNumber blocknum, const void *buffer, bool skipFsync);
-	void		(*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
-									BlockNumber blocknum, int nblocks, bool skipFsync);
-	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
-								  BlockNumber blocknum);
-	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
-							  BlockNumber blocknum, void *buffer);
-	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
-							   BlockNumber blocknum, const void *buffer, bool skipFsync);
-	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
-								   BlockNumber blocknum, BlockNumber nblocks);
-	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
-	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
-								  BlockNumber nblocks);
-	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
-} f_smgr;
-
-static const f_smgr smgrsw[] = {
-	/* magnetic disk */
-	{
-		.smgr_init = mdinit,
-		.smgr_shutdown = NULL,
-		.smgr_open = mdopen,
-		.smgr_close = mdclose,
-		.smgr_create = mdcreate,
-		.smgr_exists = mdexists,
-		.smgr_unlink = mdunlink,
-		.smgr_extend = mdextend,
-		.smgr_zeroextend = mdzeroextend,
-		.smgr_prefetch = mdprefetch,
-		.smgr_read = mdread,
-		.smgr_write = mdwrite,
-		.smgr_writeback = mdwriteback,
-		.smgr_nblocks = mdnblocks,
-		.smgr_truncate = mdtruncate,
-		.smgr_immedsync = mdimmedsync,
-	}
-};
+static f_smgr *smgrsw;
 
-static const int NSmgr = lengthof(smgrsw);
+static int NSmgr = 0;
+
+static Size LargestSMgrRelationSize = 0;
 
 /*
  * Each backend has a hashtable that stores all extant SMgrRelation objects.
@@ -102,6 +48,57 @@ static dlist_head unowned_relns;
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
 
+SMgrId
+smgr_register(const f_smgr *smgr, Size smgrrelation_size)
+{
+	SMgrId my_id;
+	MemoryContext old;
+
+	if (process_shared_preload_libraries_done)
+		elog(FATAL, "SMgrs must be registered in the shared_preload_libraries phase");
+	if (NSmgr == MaxSMgrId)
+		elog(FATAL, "Too many smgrs registered");
+	if (smgr->name == NULL || *smgr->name == 0)
+		elog(FATAL, "smgr registered with invalid name");
+
+	Assert(smgr->smgr_open != NULL);
+	Assert(smgr->smgr_close != NULL);
+	Assert(smgr->smgr_create != NULL);
+	Assert(smgr->smgr_exists != NULL);
+	Assert(smgr->smgr_unlink != NULL);
+	Assert(smgr->smgr_extend != NULL);
+	Assert(smgr->smgr_zeroextend != NULL);
+	Assert(smgr->smgr_prefetch != NULL);
+	Assert(smgr->smgr_read != NULL);
+	Assert(smgr->smgr_write != NULL);
+	Assert(smgr->smgr_writeback != NULL);
+	Assert(smgr->smgr_nblocks != NULL);
+	Assert(smgr->smgr_truncate != NULL);
+	Assert(smgr->smgr_immedsync != NULL);
+	old = MemoryContextSwitchTo(TopMemoryContext);
+
+	my_id = NSmgr++;
+	if (my_id == 0)
+		smgrsw = palloc(sizeof(f_smgr));
+	else
+		smgrsw = repalloc(smgrsw, sizeof(f_smgr) * NSmgr);
+
+	MemoryContextSwitchTo(old);
+
+	pg_compiler_barrier();
+
+	if (!smgrsw)
+	{
+		NSmgr--;
+		elog(FATAL, "Failed to extend smgr array");
+	}
+
+	memcpy(&smgrsw[my_id], smgr, sizeof(f_smgr));
+
+	LargestSMgrRelationSize = Max(LargestSMgrRelationSize, smgrrelation_size);
+
+	return my_id;
+}
 
 /*
  * smgrinit(), smgrshutdown() -- Initialize or shut down storage
@@ -157,9 +154,11 @@ smgropen(RelFileLocator rlocator, BackendId backend)
 	{
 		/* First time through: initialize the hash table */
 		HASHCTL		ctl;
+		LargestSMgrRelationSize = MAXALIGN(LargestSMgrRelationSize);
+		Assert(NSmgr > 0);
 
 		ctl.keysize = sizeof(RelFileLocatorBackend);
-		ctl.entrysize = sizeof(SMgrRelationData);
+		ctl.entrysize = LargestSMgrRelationSize;
 		SMgrRelationHash = hash_create("smgr relation table", 400,
 									   &ctl, HASH_ELEM | HASH_BLOBS);
 		dlist_init(&unowned_relns);
@@ -180,7 +179,8 @@ smgropen(RelFileLocator rlocator, BackendId backend)
 		reln->smgr_targblock = InvalidBlockNumber;
 		for (int i = 0; i <= MAX_FORKNUM; ++i)
 			reln->smgr_cached_nblocks[i] = InvalidBlockNumber;
-		reln->smgr_which = 0;	/* we only have md.c at present */
+
+		reln->smgr_which = MdSMgrId;	/* we only have md.c at present */
 
 		/* implementation-specific initialization */
 		smgrsw[reln->smgr_which].smgr_open(reln);
diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c
index a604432126..dab4be80c9 100644
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -42,6 +42,7 @@
 #include "postmaster/postmaster.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/md.h"
 #include "storage/latch.h"
 #include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
@@ -199,6 +200,9 @@ InitStandaloneProcess(const char *argv0)
 	InitProcessLocalLatch();
 	InitializeLatchWaitSet();
 
+	/* Initialize smgrs */
+	register_builtin_dynamic_managers();
+
 	/*
 	 * For consistency with InitPostmasterChild, initialize signal mask here.
 	 * But we don't unblock SIGQUIT or provide a default handler for it.
@@ -1868,6 +1872,14 @@ process_session_preload_libraries(void)
 				   true);
 }
 
+/*
+ * Register any internal managers.
+ */
+void register_builtin_dynamic_managers(void)
+{
+	mdsmgr_register();
+}
+
 /*
  * process any shared memory requests from preloaded libraries
  */
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 14bd574fc2..8f53b6351c 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -488,6 +488,7 @@ extern void TouchSocketLockFiles(void);
 extern void AddToDataDirLockFile(int target_line, const char *str);
 extern bool RecheckDataDirLockFile(void);
 extern void ValidatePgVersion(const char *path);
+extern void register_builtin_dynamic_managers(void);
 extern void process_shared_preload_libraries(void);
 extern void process_session_preload_libraries(void);
 extern void process_shmem_requests(void);
diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index 941879ee6a..beeddfd373 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -19,6 +19,10 @@
 #include "storage/smgr.h"
 #include "storage/sync.h"
 
+/* registration function for md storage manager */
+extern void mdsmgr_register(void);
+extern SMgrId MdSMgrId;
+
 /* md storage manager functionality */
 extern void mdinit(void);
 extern void mdopen(SMgrRelation reln);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index a9a179aaba..5ad1d50e0c 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -18,6 +18,10 @@
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
 
+typedef uint8 SMgrId;
+
+#define MaxSMgrId UINT8_MAX
+
 /*
  * smgr.c maintains a table of SMgrRelation objects, which are essentially
  * cached file handles.  An SMgrRelation is created (if not already present)
@@ -59,14 +63,8 @@ typedef struct SMgrRelationData
 	 * Fields below here are intended to be private to smgr.c and its
 	 * submodules.  Do not touch them from elsewhere.
 	 */
-	int			smgr_which;		/* storage manager selector */
-
-	/*
-	 * for md.c; per-fork arrays of the number of open segments
-	 * (md_num_open_segs) and the segments themselves (md_seg_fds).
-	 */
-	int			md_num_open_segs[MAX_FORKNUM + 1];
-	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
+	SMgrId		smgr_which;		/* storage manager selector */
+	int			smgrrelation_size;	/* size of this struct, incl. smgr-specific data */
 
 	/* if unowned, list link in list of all unowned SMgrRelations */
 	dlist_node	node;
@@ -77,6 +75,48 @@ typedef SMgrRelationData *SMgrRelation;
 #define SmgrIsTemp(smgr) \
 	RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator)
 
+/*
+ * This struct of function pointers defines the API between smgr.c and
+ * any individual storage manager module.  Note that smgr subfunctions are
+ * generally expected to report problems via elog(ERROR).  An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error.  Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
+ */
+typedef struct f_smgr
+{
+	const char *name;
+	void		(*smgr_init) (void);		/* may be NULL */
+	void		(*smgr_shutdown) (void);	/* may be NULL */
+	void		(*smgr_open) (SMgrRelation reln);
+	void		(*smgr_close) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_create) (SMgrRelation reln, ForkNumber forknum,
+								bool isRedo);
+	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
+								bool isRedo);
+	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
+								BlockNumber blocknum, const void *buffer, bool skipFsync);
+	void		(*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
+									BlockNumber blocknum, int nblocks, bool skipFsync);
+	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+								  BlockNumber blocknum);
+	void		(*smgr_read) (SMgrRelation reln, ForkNumber forknum,
+							  BlockNumber blocknum, void *buffer);
+	void		(*smgr_write) (SMgrRelation reln, ForkNumber forknum,
+							   BlockNumber blocknum, const void *buffer, bool skipFsync);
+	void		(*smgr_writeback) (SMgrRelation reln, ForkNumber forknum,
+								   BlockNumber blocknum, BlockNumber nblocks);
+	BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
+	void		(*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
+								  BlockNumber nblocks);
+	void		(*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
+} f_smgr;
+
+extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size);
+
 extern void smgrinit(void);
 extern SMgrRelation smgropen(RelFileLocator rlocator, BackendId backend);
 extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
-- 
2.39.0

