Rebased.  I had intended to try to get this into v17, but a couple of
unresolved problems came up while rebasing over the new incremental
backup stuff.  You snooze, you lose.  Hopefully we can sort these out
in time for the next commitfest:

* should pg_combinebasebackup read the control file to fetch the segment size?
* hunt for other segment-size related problems that may be lurking in
new incremental backup stuff
* basebackup_incremental.c wants to use memory in proportion to
segment size, which looks like a problem, and I wrote about that in a
new thread[1]

[1] 
https://www.postgresql.org/message-id/flat/CA%2BhUKG%2B2hZ0sBztPW4mkLfng0qfkNtAHFUfxOMLizJ0BPmi5%2Bg%40mail.gmail.com
From 85678257fef94aa3ca3efb39ce55fb66df7c889e Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Fri, 26 May 2023 01:41:11 +1200
Subject: [PATCH v3] Allow relation segment size to be set by initdb.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, relation segment size was a rarely modified compile time
option.  Make it an initdb option, so that users with very large tables
can avoid using so many files and file descriptors.

The initdb option --rel-segsize is modeled on the existing --wal-segsize
option.

The data type used to store the size is int64, not BlockNumber, because
it seems reasonable to want to be able to say --rel-segsize=32TB (=
don't use segments at all), but that would overflow uint32.

It should be fairly straightforward to teach pg_upgrade (or some new
dedicated tool) to convert an existing cluster to a new segment size,
but that is not done yet, so for now this is only useful for entirely
new clusters.

The default behavior is unchanged: 1GB segments.  On Windows, we can't
go above 2GB for now due (we'd have to make a lot of changes due to
Windows' small off_t).

XXX work remains to be done for incremental backups

Reviewed-by: David Steele <david@pgmasters.net>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Reviewed-by: Stephen Frost <sfrost@snowman.net>
Reviewed-by: Jim Mlodgenski <jimmy76@gmail.com>
Reivewed-by: Dagfinn Ilmari Mannsåker <ilmari@ilmari.org>
Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com>
Discussion: https://postgr.es/m/CA%2BhUKG%2BBGXwMbrvzXAjL8VMGf25y_ga_XnO741g10y0%3Dm6dDiA%40mail.gmail.com
---
 configure                                   |  91 --------------
 configure.ac                                |  55 ---------
 doc/src/sgml/config.sgml                    |   7 +-
 doc/src/sgml/ref/initdb.sgml                |  24 ++++
 meson.build                                 |  14 ---
 src/backend/access/transam/xlog.c           |  11 +-
 src/backend/backup/basebackup.c             |   7 +-
 src/backend/backup/basebackup_incremental.c |  31 +++--
 src/backend/bootstrap/bootstrap.c           |   5 +-
 src/backend/storage/file/buffile.c          |   6 +-
 src/backend/storage/smgr/md.c               | 128 ++++++++++++--------
 src/backend/storage/smgr/smgr.c             |  14 +++
 src/backend/utils/misc/guc.c                |  16 +++
 src/backend/utils/misc/guc_tables.c         |  12 +-
 src/bin/initdb/initdb.c                     |  47 ++++++-
 src/bin/pg_checksums/pg_checksums.c         |   2 +-
 src/bin/pg_combinebackup/reconstruct.c      |  18 ++-
 src/bin/pg_controldata/pg_controldata.c     |   2 +-
 src/bin/pg_resetwal/pg_resetwal.c           |   4 +-
 src/bin/pg_rewind/filemap.c                 |   4 +-
 src/bin/pg_rewind/pg_rewind.c               |   3 +
 src/bin/pg_rewind/pg_rewind.h               |   1 +
 src/bin/pg_upgrade/relfilenumber.c          |   2 +-
 src/include/catalog/pg_control.h            |   2 +-
 src/include/pg_config.h.in                  |  13 --
 src/include/storage/smgr.h                  |   3 +
 src/include/utils/guc_tables.h              |   1 +
 27 files changed, 249 insertions(+), 274 deletions(-)

diff --git a/configure b/configure
index 36feeafbb23..49a7f0f2c4a 100755
--- a/configure
+++ b/configure
@@ -842,8 +842,6 @@ enable_dtrace
 enable_tap_tests
 enable_injection_points
 with_blocksize
-with_segsize
-with_segsize_blocks
 with_wal_blocksize
 with_llvm
 enable_depend
@@ -1551,9 +1549,6 @@ Optional Packages:
   --with-pgport=PORTNUM   set default port number [5432]
   --with-blocksize=BLOCKSIZE
                           set table block size in kB [8]
-  --with-segsize=SEGSIZE  set table segment size in GB [1]
-  --with-segsize-blocks=SEGSIZE_BLOCKS
-                          set table segment size in blocks [0]
   --with-wal-blocksize=BLOCKSIZE
                           set WAL block size in kB [8]
   --with-llvm             build with LLVM based JIT support
@@ -3759,85 +3754,6 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
-#
-# Relation segment size
-#
-
-
-
-# Check whether --with-segsize was given.
-if test "${with_segsize+set}" = set; then :
-  withval=$with_segsize;
-  case $withval in
-    yes)
-      as_fn_error $? "argument required for --with-segsize option" "$LINENO" 5
-      ;;
-    no)
-      as_fn_error $? "argument required for --with-segsize option" "$LINENO" 5
-      ;;
-    *)
-      segsize=$withval
-      ;;
-  esac
-
-else
-  segsize=1
-fi
-
-
-
-
-
-# Check whether --with-segsize-blocks was given.
-if test "${with_segsize_blocks+set}" = set; then :
-  withval=$with_segsize_blocks;
-  case $withval in
-    yes)
-      as_fn_error $? "argument required for --with-segsize-blocks option" "$LINENO" 5
-      ;;
-    no)
-      as_fn_error $? "argument required for --with-segsize-blocks option" "$LINENO" 5
-      ;;
-    *)
-      segsize_blocks=$withval
-      ;;
-  esac
-
-else
-  segsize_blocks=0
-fi
-
-
-
-# If --with-segsize-blocks is non-zero, it is used, --with-segsize
-# otherwise. segsize-blocks is only really useful for developers wanting to
-# test segment related code. Warn if both are used.
-if test $segsize_blocks -ne 0 -a $segsize -ne 1; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: both --with-segsize and --with-segsize-blocks specified, --with-segsize-blocks wins" >&5
-$as_echo "$as_me: WARNING: both --with-segsize and --with-segsize-blocks specified, --with-segsize-blocks wins" >&2;}
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for segment size" >&5
-$as_echo_n "checking for segment size... " >&6; }
-if test $segsize_blocks -eq 0; then
-  # this expression is set up to avoid unnecessary integer overflow
-  # blocksize is already guaranteed to be a factor of 1024
-  RELSEG_SIZE=`expr '(' 1024 / ${blocksize} ')' '*' ${segsize} '*' 1024`
-  test $? -eq 0 || exit 1
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${segsize}GB" >&5
-$as_echo "${segsize}GB" >&6; }
-else
-  RELSEG_SIZE=$segsize_blocks
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${RELSEG_SIZE} blocks" >&5
-$as_echo "${RELSEG_SIZE} blocks" >&6; }
-fi
-
-
-cat >>confdefs.h <<_ACEOF
-#define RELSEG_SIZE ${RELSEG_SIZE}
-_ACEOF
-
-
 #
 # WAL block size
 #
@@ -15107,13 +15023,6 @@ _ACEOF
 
 
 
-# If we don't have largefile support, can't handle segment size >= 2GB.
-if test "$ac_cv_sizeof_off_t" -lt 8; then
-  if expr $RELSEG_SIZE '*' $blocksize '>=' 2 '*' 1024 '*' 1024; then
-    as_fn_error $? "Large file support is not enabled. Segment size cannot be larger than 1GB." "$LINENO" 5
-  fi
-fi
-
 # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
 # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
diff --git a/configure.ac b/configure.ac
index 57f734879e1..a04716aebf5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -288,54 +288,6 @@ AC_DEFINE_UNQUOTED([BLCKSZ], ${BLCKSZ}, [
  Changing BLCKSZ requires an initdb.
 ])
 
-#
-# Relation segment size
-#
-PGAC_ARG_REQ(with, segsize, [SEGSIZE], [set table segment size in GB [1]],
-             [segsize=$withval],
-             [segsize=1])
-PGAC_ARG_REQ(with, segsize-blocks, [SEGSIZE_BLOCKS], [set table segment size in blocks [0]],
-             [segsize_blocks=$withval],
-             [segsize_blocks=0])
-
-# If --with-segsize-blocks is non-zero, it is used, --with-segsize
-# otherwise. segsize-blocks is only really useful for developers wanting to
-# test segment related code. Warn if both are used.
-if test $segsize_blocks -ne 0 -a $segsize -ne 1; then
-  AC_MSG_WARN([both --with-segsize and --with-segsize-blocks specified, --with-segsize-blocks wins])
-fi
-
-AC_MSG_CHECKING([for segment size])
-if test $segsize_blocks -eq 0; then
-  # this expression is set up to avoid unnecessary integer overflow
-  # blocksize is already guaranteed to be a factor of 1024
-  RELSEG_SIZE=`expr '(' 1024 / ${blocksize} ')' '*' ${segsize} '*' 1024`
-  test $? -eq 0 || exit 1
-  AC_MSG_RESULT([${segsize}GB])
-else
-  RELSEG_SIZE=$segsize_blocks
-  AC_MSG_RESULT([${RELSEG_SIZE} blocks])
-fi
-
-AC_DEFINE_UNQUOTED([RELSEG_SIZE], ${RELSEG_SIZE}, [
- RELSEG_SIZE is the maximum number of blocks allowed in one disk file.
- Thus, the maximum size of a single file is RELSEG_SIZE * BLCKSZ;
- relations bigger than that are divided into multiple files.
-
- RELSEG_SIZE * BLCKSZ must be less than your OS' limit on file size.
- This is often 2 GB or 4GB in a 32-bit operating system, unless you
- have large file support enabled.  By default, we make the limit 1 GB
- to avoid any possible integer-overflow problems within the OS.
- A limit smaller than necessary only means we divide a large
- relation into more chunks than necessary, so it seems best to err
- in the direction of a small limit.
-
- A power-of-2 value is recommended to save a few cycles in md.c,
- but is not absolutely required.
-
- Changing RELSEG_SIZE requires an initdb.
-])
-
 #
 # WAL block size
 #
@@ -1712,13 +1664,6 @@ fi
 dnl Check for largefile support (must be after AC_SYS_LARGEFILE)
 AC_CHECK_SIZEOF([off_t])
 
-# If we don't have largefile support, can't handle segment size >= 2GB.
-if test "$ac_cv_sizeof_off_t" -lt 8; then
-  if expr $RELSEG_SIZE '*' $blocksize '>=' 2 '*' 1024 '*' 1024; then
-    AC_MSG_ERROR([Large file support is not enabled. Segment size cannot be larger than 1GB.])
-  fi
-fi
-
 AC_CHECK_SIZEOF([bool], [],
 [#ifdef HAVE_STDBOOL_H
 #include <stdbool.h>
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b38cbd714aa..e7638e3d3f4 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -11040,10 +11040,9 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       <listitem>
        <para>
         Reports the number of blocks (pages) that can be stored within a file
-        segment.  It is determined by the value of <literal>RELSEG_SIZE</literal>
-        when building the server.  The maximum size of a segment file in bytes
-        is equal to <varname>segment_size</varname> multiplied by
-        <varname>block_size</varname>; by default this is 1GB.
+        segment.  It is changeable with the <literal>--rel-segsize</literal> option
+        with a cluster is initialized with <application>initdb</application>.
+        By default this is 1GB.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index cd75cae10e2..db1ed95694c 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -470,6 +470,30 @@ PostgreSQL documentation
        </para>
       </listitem>
      </varlistentry>
+
+     <varlistentry id="app-initdb-option-rel-segsize">
+      <term><option>--rel-segsize=<replaceable>size</replaceable></option></term>
+      <listitem>
+       <para>
+        Set the maximum size of relation segment files.  The size must have a suffix
+        <literal>kB</literal>, <literal>MB</literal>, <literal>GB</literal> or
+        <literal>TB</literal>.  The default size is 1GB, which was chosen to
+        support large relations on operating systems without large file support.
+        This option can only be set during initialization, and cannot be
+        changed later.
+       </para>
+
+       <para>
+        Setting this to a value higher than the default reduces the
+        number of file descriptors that must be managed while accessing very large
+        tables.  Note that values higher than the file system can support may
+        result in errors while trying to extend a table (for example Linux ext4
+        limits files to 16TB), and values above 2GB are not supported on
+        operating systems without a large <literal>off_t</literal> data type
+        (currently Windows).
+       </para>
+      </listitem>
+     </varlistentry>
     </variablelist>
    </para>
 
diff --git a/meson.build b/meson.build
index 85788f9dd8f..551b46a9831 100644
--- a/meson.build
+++ b/meson.build
@@ -420,16 +420,6 @@ cdata.set('USE_INJECTION_POINTS', get_option('injection_points') ? 1 : false)
 
 blocksize = get_option('blocksize').to_int() * 1024
 
-if get_option('segsize_blocks') != 0
-  if get_option('segsize') != 1
-    warning('both segsize and segsize_blocks specified, segsize_blocks wins')
-  endif
-
-  segsize = get_option('segsize_blocks')
-else
-  segsize = (get_option('segsize') * 1024 * 1024 * 1024) / blocksize
-endif
-
 cdata.set('BLCKSZ', blocksize, description:
 '''Size of a disk block --- this also limits the size of a tuple. You can set
    it bigger if you need bigger tuples (although TOAST should reduce the need
@@ -440,7 +430,6 @@ cdata.set('BLCKSZ', blocksize, description:
    Changing BLCKSZ requires an initdb.''')
 
 cdata.set('XLOG_BLCKSZ', get_option('wal_blocksize').to_int() * 1024)
-cdata.set('RELSEG_SIZE', segsize)
 cdata.set('DEF_PGPORT', get_option('pgport'))
 cdata.set_quoted('DEF_PGPORT_STR', get_option('pgport').to_string())
 cdata.set_quoted('PG_KRB_SRVNAM', get_option('krb_srvnam'))
@@ -3359,9 +3348,6 @@ if meson.version().version_compare('>=0.57')
     {
       'data block size': '@0@ kB'.format(cdata.get('BLCKSZ') / 1024),
       'WAL block size': '@0@ kB'.format(cdata.get('XLOG_BLCKSZ') / 1024),
-      'segment size': get_option('segsize_blocks') != 0 ?
-        '@0@ blocks'.format(cdata.get('RELSEG_SIZE')) :
-        '@0@ GB'.format(get_option('segsize')),
     },
     section: 'Data layout',
   )
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 20a5f862090..1c705c3469a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4178,7 +4178,7 @@ WriteControlFile(void)
 	ControlFile->floatFormat = FLOATFORMAT_VALUE;
 
 	ControlFile->blcksz = BLCKSZ;
-	ControlFile->relseg_size = RELSEG_SIZE;
+	ControlFile->relseg_size = rel_segment_size;
 	ControlFile->xlog_blcksz = XLOG_BLCKSZ;
 	ControlFile->xlog_seg_size = wal_segment_size;
 
@@ -4348,13 +4348,6 @@ ReadControlFile(void)
 						   " but the server was compiled with BLCKSZ %d.",
 						   ControlFile->blcksz, BLCKSZ),
 				 errhint("It looks like you need to recompile or initdb.")));
-	if (ControlFile->relseg_size != RELSEG_SIZE)
-		ereport(FATAL,
-				(errmsg("database files are incompatible with server"),
-				 errdetail("The database cluster was initialized with RELSEG_SIZE %d,"
-						   " but the server was compiled with RELSEG_SIZE %d.",
-						   ControlFile->relseg_size, RELSEG_SIZE),
-				 errhint("It looks like you need to recompile or initdb.")));
 	if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
 		ereport(FATAL,
 				(errmsg("database files are incompatible with server"),
@@ -4436,6 +4429,8 @@ ReadControlFile(void)
 
 	CalculateCheckpointSegments();
 
+	rel_segment_size = ControlFile->relseg_size;
+
 	/* Make the initdb settings visible as GUC variables, too */
 	SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
 					PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index 5fbbe5ffd20..87e57ec2352 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -43,6 +43,7 @@
 #include "storage/dsm_impl.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
+#include "storage/smgr.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
 #include "utils/ps_status.h"
@@ -1206,7 +1207,7 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
 	 * But we don't need it at all if this is not an incremental backup.
 	 */
 	if (ib != NULL)
-		relative_block_numbers = palloc(sizeof(BlockNumber) * RELSEG_SIZE);
+		relative_block_numbers = palloc(sizeof(BlockNumber) * rel_segment_size);
 
 	/*
 	 * Determine if the current path is a database directory that can contain
@@ -1682,7 +1683,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
 			 */
 			cnt = read_file_data_into_buffer(sink, readfilename, fd,
 											 bytes_done, remaining,
-											 blkno + segno * RELSEG_SIZE,
+											 blkno + segno * rel_segment_size,
 											 verify_checksum,
 											 &checksum_failures);
 		}
@@ -1704,7 +1705,7 @@ sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
 			cnt = read_file_data_into_buffer(sink, readfilename, fd,
 											 relative_blkno * BLCKSZ,
 											 BLCKSZ,
-											 relative_blkno + segno * RELSEG_SIZE,
+											 relative_blkno + segno * rel_segment_size,
 											 verify_checksum,
 											 &checksum_failures);
 
diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c
index ebc41f28be5..274964e63a1 100644
--- a/src/backend/backup/basebackup_incremental.c
+++ b/src/backend/backup/basebackup_incremental.c
@@ -28,6 +28,7 @@
 #include "common/int.h"
 #include "datatype/timestamp.h"
 #include "postmaster/walsummarizer.h"
+#include "storage/smgr.h"
 #include "utils/timestamp.h"
 
 #define	BLOCKS_PER_READ			512
@@ -699,9 +700,9 @@ GetIncrementalFilePath(Oid dboid, Oid spcoid, RelFileNumber relfilenumber,
  * an incremental file in the backup instead of the entire file. On return,
  * *num_blocks_required will be set to the number of blocks that need to be
  * sent, and the actual block numbers will have been stored in
- * relative_block_numbers, which should be an array of at least RELSEG_SIZE.
- * In addition, *truncation_block_length will be set to the value that should
- * be included in the incremental file.
+ * relative_block_numbers, which should be an array of at least
+ * rel_segment_size.  * In addition, *truncation_block_length will be set to
+ * the value that should be included in the incremental file.
  */
 FileBackupMethod
 GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
@@ -712,7 +713,7 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 					BlockNumber *relative_block_numbers,
 					unsigned *truncation_block_length)
 {
-	BlockNumber absolute_block_numbers[RELSEG_SIZE];
+	BlockNumber *absolute_block_numbers;
 	BlockNumber limit_block;
 	BlockNumber start_blkno;
 	BlockNumber stop_blkno;
@@ -735,7 +736,7 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 	 * If the file size is too large or not a multiple of BLCKSZ, then
 	 * something weird is happening, so give up and send the whole file.
 	 */
-	if ((size % BLCKSZ) != 0 || size / BLCKSZ > RELSEG_SIZE)
+	if ((size % BLCKSZ) != 0 || size / BLCKSZ > rel_segment_size)
 		return BACK_UP_FILE_FULLY;
 
 	/*
@@ -823,7 +824,7 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 	 * If the limit_block is less than or equal to the point where this
 	 * segment starts, send the whole file.
 	 */
-	if (limit_block <= segno * RELSEG_SIZE)
+	if (limit_block <= segno * rel_segment_size)
 		return BACK_UP_FILE_FULLY;
 
 	/*
@@ -832,16 +833,18 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 	 * We shouldn't overflow computing the start or stop block numbers, but if
 	 * it manages to happen somehow, detect it and throw an error.
 	 */
-	start_blkno = segno * RELSEG_SIZE;
+	start_blkno = segno * rel_segment_size;
 	stop_blkno = start_blkno + (size / BLCKSZ);
-	if (start_blkno / RELSEG_SIZE != segno || stop_blkno < start_blkno)
+	if (start_blkno / rel_segment_size != segno || stop_blkno < start_blkno)
 		ereport(ERROR,
 				errcode(ERRCODE_INTERNAL_ERROR),
 				errmsg_internal("overflow computing block number bounds for segment %u with size %zu",
 								segno, size));
+	absolute_block_numbers = palloc(sizeof(BlockNumber) * rel_segment_size);
 	nblocks = BlockRefTableEntryGetBlocks(brtentry, start_blkno, stop_blkno,
-										  absolute_block_numbers, RELSEG_SIZE);
-	Assert(nblocks <= RELSEG_SIZE);
+										  absolute_block_numbers,
+										  rel_segment_size);
+	Assert(nblocks <= rel_segment_size);
 
 	/*
 	 * If we're going to have to send nearly all of the blocks, then just send
@@ -856,7 +859,10 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 	 * nothing good about sending an incremental file in that case.
 	 */
 	if (nblocks * BLCKSZ > size * 0.9)
+	{
+		pfree(absolute_block_numbers);
 		return BACK_UP_FILE_FULLY;
+	}
 
 	/*
 	 * Looks like we can send an incremental file, so sort the absolute the
@@ -872,6 +878,7 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 		  compare_block_numbers);
 	for (i = 0; i < nblocks; ++i)
 		relative_block_numbers[i] = absolute_block_numbers[i] - start_blkno;
+	pfree(absolute_block_numbers);
 	*num_blocks_required = nblocks;
 
 	/*
@@ -885,7 +892,7 @@ GetFileBackupMethod(IncrementalBackupInfo *ib, const char *path,
 	*truncation_block_length = size / BLCKSZ;
 	if (BlockNumberIsValid(limit_block))
 	{
-		unsigned	relative_limit = limit_block - segno * RELSEG_SIZE;
+		unsigned	relative_limit = limit_block - segno * rel_segment_size;
 
 		if (*truncation_block_length < relative_limit)
 			*truncation_block_length = relative_limit;
@@ -904,7 +911,7 @@ GetIncrementalFileSize(unsigned num_blocks_required)
 	size_t		result;
 
 	/* Make sure we're not going to overflow. */
-	Assert(num_blocks_required <= RELSEG_SIZE);
+	Assert(num_blocks_required <= rel_segment_size);
 
 	/*
 	 * Three four byte quantities (magic number, truncation block length,
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 986f6f1d9ca..880e913ce3c 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -217,7 +217,7 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
 	argv++;
 	argc--;
 
-	while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:-:")) != -1)
+	while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:R:-:")) != -1)
 	{
 		switch (flag)
 		{
@@ -275,6 +275,9 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
 			case 'r':
 				strlcpy(OutputFileName, optarg, MAXPGPATH);
 				break;
+			case 'R':
+				rel_segment_size = strtoi64(optarg, NULL, 0);
+				break;
 			case 'X':
 				SetConfigOption("wal_segment_size", optarg, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT);
 				break;
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index a263875fd5a..e0b9fed9b7b 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -55,9 +55,9 @@
 #include "utils/resowner.h"
 
 /*
- * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
- * The reason is that we'd like large BufFiles to be spread across multiple
- * tablespaces when available.
+ * We break BufFiles into gigabyte-sized segments, regardless of
+ * rel_segment_size.  The reason is that we'd like large BufFiles to be spread
+ * across multiple tablespaces when available.
  */
 #define MAX_PHYSICAL_FILESIZE	0x40000000
 #define BUFFILE_SEG_SIZE		(MAX_PHYSICAL_FILESIZE / BLCKSZ)
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index bf0f3ca76d1..b95a15b8599 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -31,6 +31,7 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
+#include "port/pg_bitutils.h"
 #include "storage/bufmgr.h"
 #include "storage/fd.h"
 #include "storage/md.h"
@@ -43,15 +44,15 @@
  * The magnetic disk storage manager keeps track of open file
  * descriptors in its own descriptor pool.  This is done to make it
  * easier to support relations that are larger than the operating
- * system's file size limit (often 2GBytes).  In order to do that,
- * we break relations up into "segment" files that are each shorter than
- * the OS file size limit.  The segment size is set by the RELSEG_SIZE
- * configuration constant in pg_config.h.
+ * system's file size limit (historically 2GB, sometimes much larger but still
+ * smaller than the maximum possible relation size).  In order to do that, we
+ * break relations up into "segment" files of a user-specified size chosen at
+ * initdb time and accessed as rel_segment_size.
  *
  * On disk, a relation must consist of consecutively numbered segment
  * files in the pattern
- *	-- Zero or more full segments of exactly RELSEG_SIZE blocks each
- *	-- Exactly one partial segment of size 0 <= size < RELSEG_SIZE blocks
+ *	-- Zero or more full segments of exactly rel_segment_size blocks each
+ *	-- Exactly one partial segment of size 0 <= size < rel_segment_size blocks
  *	-- Optionally, any number of inactive segments of size 0 blocks.
  * The full and partial segments are collectively the "active" segments.
  * Inactive segments are those that once contained data but are currently
@@ -108,7 +109,7 @@ static MemoryContext MdCxt;		/* context for all MdfdVec objects */
 #define EXTENSION_CREATE_RECOVERY	(1 << 3)
 /*
  * Allow opening segments which are preceded by segments smaller than
- * RELSEG_SIZE, e.g. inactive segments (see above). Note that this breaks
+ * rel_segment_size, e.g. inactive segments (see above). Note that this breaks
  * mdnblocks() and related functionality henceforth - which currently is ok,
  * because this is only required in the checkpointer which never uses
  * mdnblocks().
@@ -140,6 +141,31 @@ static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum,
 static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
 							  MdfdVec *seg);
 
+/* Given a block number, which segment is it in? */
+static inline uint32
+blockno_to_segno(BlockNumber blockno)
+{
+	/* Because it's a power of two, we can use a shift instead of "/". */
+	Assert(pg_popcount64(rel_segment_size) == 1);
+	return (uint64) blockno >> pg_leftmost_one_pos64(rel_segment_size);
+}
+
+/* Given a block number, which block is that within its segment? */
+static inline BlockNumber
+blockno_within_segment(BlockNumber blockno)
+{
+	/* Because it's a power of two, we can use a mask instead of "%". */
+	Assert(pg_popcount64(rel_segment_size) == 1);
+	return blockno & (rel_segment_size - 1);
+}
+
+/* Given a block number, convert it to byte offset within a segment. */
+static inline off_t
+blockno_to_seekpos(BlockNumber blockno)
+{
+	return blockno_within_segment(blockno) * (off_t) BLCKSZ;
+}
+
 static inline int
 _mdfd_open_flags(void)
 {
@@ -488,9 +514,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
 
-	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	seekpos = blockno_to_seekpos(blocknum);
 
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+	Assert(seekpos < (off_t) BLCKSZ * rel_segment_size);
 
 	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
 	{
@@ -512,7 +538,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	if (!skipFsync && !SmgrIsTemp(reln))
 		register_dirty_segment(reln, forknum, v);
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, forknum, v) <= rel_segment_size);
 }
 
 /*
@@ -550,19 +576,19 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 
 	while (remblocks > 0)
 	{
-		BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
-		off_t		seekpos = (off_t) BLCKSZ * segstartblock;
+		BlockNumber segstartblock = blockno_within_segment(blocknum);
+		off_t		seekpos = blockno_to_seekpos(blocknum);
 		int			numblocks;
 
-		if (segstartblock + remblocks > RELSEG_SIZE)
-			numblocks = RELSEG_SIZE - segstartblock;
+		if (segstartblock + remblocks > rel_segment_size)
+			numblocks = rel_segment_size - segstartblock;
 		else
 			numblocks = remblocks;
 
 		v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
 
-		Assert(segstartblock < RELSEG_SIZE);
-		Assert(segstartblock + numblocks <= RELSEG_SIZE);
+		Assert(segstartblock < rel_segment_size);
+		Assert(segstartblock + numblocks <= rel_segment_size);
 
 		/*
 		 * If available and useful, use posix_fallocate() (via
@@ -616,7 +642,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 		if (!skipFsync && !SmgrIsTemp(reln))
 			register_dirty_segment(reln, forknum, v);
 
-		Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+		Assert(_mdnblocks(reln, forknum, v) <= rel_segment_size);
 
 		remblocks -= numblocks;
 		curblocknum += numblocks;
@@ -668,7 +694,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior)
 	mdfd->mdfd_vfd = fd;
 	mdfd->mdfd_segno = 0;
 
-	Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, forknum, mdfd) <= rel_segment_size);
 
 	return mdfd;
 }
@@ -732,13 +758,13 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		if (v == NULL)
 			return false;
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = blockno_to_seekpos(blocknum);
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (off_t) BLCKSZ * rel_segment_size);
 
 		nblocks_this_segment =
 			Min(nblocks,
-				RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
+				rel_segment_size - blockno_within_segment(blocknum));
 
 		(void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ * nblocks_this_segment,
 							WAIT_EVENT_DATA_FILE_PREFETCH);
@@ -824,13 +850,13 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		v = _mdfd_getseg(reln, forknum, blocknum, false,
 						 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = blockno_to_seekpos(blocknum);
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (off_t) BLCKSZ * rel_segment_size);
 
 		nblocks_this_segment =
 			Min(nblocks,
-				RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
+				rel_segment_size - blockno_within_segment(blocknum));
 		nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
 
 		iovcnt = buffers_to_iovec(iov, buffers, nblocks_this_segment);
@@ -947,13 +973,13 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
 						 EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = blockno_to_seekpos(blocknum);
 
-		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		Assert(seekpos < (off_t) BLCKSZ * rel_segment_size);
 
 		nblocks_this_segment =
 			Min(nblocks,
-				RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE)));
+				rel_segment_size - blockno_within_segment(blocknum));
 		nblocks_this_segment = Min(nblocks_this_segment, lengthof(iov));
 
 		iovcnt = buffers_to_iovec(iov, (void **) buffers, nblocks_this_segment);
@@ -1058,17 +1084,17 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
 			return;
 
 		/* compute offset inside the current segment */
-		segnum_start = blocknum / RELSEG_SIZE;
+		segnum_start = blockno_to_segno(blocknum);
 
 		/* compute number of desired writes within the current segment */
-		segnum_end = (blocknum + nblocks - 1) / RELSEG_SIZE;
+		segnum_end = blockno_to_segno(blocknum + nblocks - 1);
 		if (segnum_start != segnum_end)
-			nflush = RELSEG_SIZE - (blocknum % ((BlockNumber) RELSEG_SIZE));
+			nflush = rel_segment_size - blockno_within_segment(blocknum);
 
 		Assert(nflush >= 1);
 		Assert(nflush <= nblocks);
 
-		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+		seekpos = blockno_to_seekpos(blocknum);
 
 		FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
 
@@ -1099,8 +1125,8 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 
 	/*
 	 * Start from the last open segments, to avoid redundant seeks.  We have
-	 * previously verified that these segments are exactly RELSEG_SIZE long,
-	 * and it's useless to recheck that each time.
+	 * previously verified that these segments are exactly rel_segment_size
+	 * long, and it's useless to recheck that each time.
 	 *
 	 * NOTE: this assumption could only be wrong if another backend has
 	 * truncated the relation.  We rely on higher code levels to handle that
@@ -1116,13 +1142,13 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 	for (;;)
 	{
 		nblocks = _mdnblocks(reln, forknum, v);
-		if (nblocks > ((BlockNumber) RELSEG_SIZE))
+		if (nblocks > rel_segment_size)
 			elog(FATAL, "segment too big");
-		if (nblocks < ((BlockNumber) RELSEG_SIZE))
-			return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
+		if (nblocks < rel_segment_size)
+			return (segno * rel_segment_size) + nblocks;
 
 		/*
-		 * If segment is exactly RELSEG_SIZE, advance to next one.
+		 * If segment is exactly rel_segment_size, advance to next one.
 		 */
 		segno++;
 
@@ -1135,7 +1161,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum)
 		 */
 		v = _mdfd_openseg(reln, forknum, segno, 0);
 		if (v == NULL)
-			return segno * ((BlockNumber) RELSEG_SIZE);
+			return segno * rel_segment_size;
 	}
 }
 
@@ -1176,7 +1202,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 	{
 		MdfdVec    *v;
 
-		priorblocks = (curopensegs - 1) * RELSEG_SIZE;
+		priorblocks = (curopensegs - 1) * rel_segment_size;
 
 		v = &reln->md_seg_fds[forknum][curopensegs - 1];
 
@@ -1201,13 +1227,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 			FileClose(v->mdfd_vfd);
 			_fdvec_resize(reln, forknum, curopensegs - 1);
 		}
-		else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
+		else if (priorblocks + rel_segment_size > nblocks)
 		{
 			/*
 			 * This is the last segment we want to keep. Truncate the file to
 			 * the right length. NOTE: if nblocks is exactly a multiple K of
-			 * RELSEG_SIZE, we will truncate the K+1st segment to 0 length but
-			 * keep it. This adheres to the invariant given in the header
+			 * rel_setment_size, we will truncate the K+1st segment to 0 length
+			 * but keep it. This adheres to the invariant given in the header
 			 * comments.
 			 */
 			BlockNumber lastsegblocks = nblocks - priorblocks;
@@ -1566,7 +1592,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
 	v->mdfd_vfd = fd;
 	v->mdfd_segno = segno;
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, forknum, v) <= rel_segment_size);
 
 	/* all done */
 	return v;
@@ -1593,7 +1619,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 		   (EXTENSION_FAIL | EXTENSION_CREATE | EXTENSION_RETURN_NULL |
 			EXTENSION_DONT_OPEN));
 
-	targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
+	targetseg = blockno_to_segno(blkno);
 
 	/* if an existing and opened segment, we're done */
 	if (targetseg < reln->md_num_open_segs[forknum])
@@ -1630,7 +1656,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 
 		Assert(nextsegno == v->mdfd_segno + 1);
 
-		if (nblocks > ((BlockNumber) RELSEG_SIZE))
+		if (nblocks > rel_segment_size)
 			elog(FATAL, "segment too big");
 
 		if ((behavior & EXTENSION_CREATE) ||
@@ -1645,31 +1671,31 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			 * ahead and create the segments so we can finish out the replay.
 			 *
 			 * We have to maintain the invariant that segments before the last
-			 * active segment are of size RELSEG_SIZE; therefore, if
+			 * active segment are of size rel_segment_size; therefore, if
 			 * extending, pad them out with zeroes if needed.  (This only
 			 * matters if in recovery, or if the caller is extending the
 			 * relation discontiguously, but that can happen in hash indexes.)
 			 */
-			if (nblocks < ((BlockNumber) RELSEG_SIZE))
+			if (nblocks < rel_segment_size)
 			{
 				char	   *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
 													 MCXT_ALLOC_ZERO);
 
 				mdextend(reln, forknum,
-						 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+						 nextsegno * rel_segment_size - 1,
 						 zerobuf, skipFsync);
 				pfree(zerobuf);
 			}
 			flags = O_CREAT;
 		}
 		else if (!(behavior & EXTENSION_DONT_CHECK_SIZE) &&
-				 nblocks < ((BlockNumber) RELSEG_SIZE))
+				 nblocks < rel_segment_size)
 		{
 			/*
 			 * When not extending (or explicitly including truncated
 			 * segments), only open the next segment if the current one is
-			 * exactly RELSEG_SIZE.  If not (this branch), either return NULL
-			 * or fail.
+			 * exactly rel_segment_size.  If not (this branch), either return
+			 * NULL or fail.
 			 */
 			if (behavior & EXTENSION_RETURN_NULL)
 			{
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index a5b18328b89..82ede1c4f0a 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -57,10 +57,18 @@
 #include "storage/ipc.h"
 #include "storage/md.h"
 #include "storage/smgr.h"
+#include "utils/guc_tables.h"
 #include "utils/hsearch.h"
 #include "utils/inval.h"
 
 
+/*
+ * The number of blocks that should be in a segment file.  Has a wider type
+ * than BlockNumber, so that can represent the case the whole relation fits in
+ * one file.
+ */
+int64		rel_segment_size;
+
 /*
  * This struct of function pointers defines the API between smgr.c and
  * any individual storage manager module.  Note that smgr subfunctions are
@@ -820,3 +828,9 @@ ProcessBarrierSmgrRelease(void)
 	smgrreleaseall();
 	return true;
 }
+
+const char *
+show_segment_size(void)
+{
+	return ShowGUCInt64WithUnits(rel_segment_size, GUC_UNIT_BLOCKS);
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index dd5a46469a6..009db70a8b6 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -5393,6 +5393,22 @@ GetConfigOptionByName(const char *name, const char **varname, bool missing_ok)
 	return ShowGUCOption(record, true);
 }
 
+/*
+ * Show unit-based values with appropriate unit, as ShowGUCOption() would.
+ * This can be used by custom show hooks.
+ */
+char *
+ShowGUCInt64WithUnits(int64 value, int flags)
+{
+	int64		number;
+	const char *unit;
+	char		buffer[256];
+
+	convert_int_from_base_unit(value, flags & GUC_UNIT, &number, &unit);
+	snprintf(buffer, sizeof(buffer), INT64_FORMAT "%s", number, unit);
+	return pstrdup(buffer);
+}
+
 /*
  * ShowGUCOption: get string value of variable
  *
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 45013582a74..45cd53ab79b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -594,10 +594,10 @@ static int	max_function_args;
 static int	max_index_keys;
 static int	max_identifier_length;
 static int	block_size;
-static int	segment_size;
 static int	shared_memory_size_mb;
 static int	shared_memory_size_in_huge_pages;
 static int	wal_block_size;
+static int	phony_segment_size;
 static bool data_checksums;
 static bool integer_datetimes;
 
@@ -3239,15 +3239,19 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	/*
+	 * We used a phony GUC with a custom show function, because we don't
+	 * support GUCs with a wide enough type.
+	 */
 	{
 		{"segment_size", PGC_INTERNAL, PRESET_OPTIONS,
 			gettext_noop("Shows the number of pages per disk file."),
 			NULL,
 			GUC_UNIT_BLOCKS | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
 		},
-		&segment_size,
-		RELSEG_SIZE, RELSEG_SIZE, RELSEG_SIZE,
-		NULL, NULL, NULL
+		&phony_segment_size,
+		0, 0, 0,
+		NULL, NULL, show_segment_size
 	},
 
 	{
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 200b2e8e317..0f24e0337a7 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -81,6 +81,7 @@
 #include "getopt_long.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "port/pg_bitutils.h"
 
 
 /* Ideally this would be in a .h file, but it hardly seems worth the trouble */
@@ -165,6 +166,8 @@ static bool show_setting = false;
 static bool data_checksums = false;
 static char *xlog_dir = NULL;
 static int	wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static char *str_rel_segment_size = NULL;
+static int64 rel_segment_size;
 static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
 
 
@@ -1536,12 +1539,12 @@ bootstrap_template1(void)
 
 	printfPQExpBuffer(&cmd, "\"%s\" --boot %s %s", backend_exec, boot_options, extra_options);
 	appendPQExpBuffer(&cmd, " -X %d", wal_segment_size_mb * (1024 * 1024));
+	appendPQExpBuffer(&cmd, " -R " INT64_FORMAT, rel_segment_size);
 	if (data_checksums)
 		appendPQExpBuffer(&cmd, " -k");
 	if (debug)
 		appendPQExpBuffer(&cmd, " -d 5");
 
-
 	PG_CMD_OPEN(cmd.data);
 
 	for (line = bki_lines; *line != NULL; line++)
@@ -2456,6 +2459,7 @@ usage(const char *progname)
 	printf(_("  -W, --pwprompt            prompt for a password for the new superuser\n"));
 	printf(_("  -X, --waldir=WALDIR       location for the write-ahead log directory\n"));
 	printf(_("      --wal-segsize=SIZE    size of WAL segments, in megabytes\n"));
+	printf(_("      --rel-segsize=SIZE    size of relation segments\n"));
 	printf(_("\nLess commonly used options:\n"));
 	printf(_("  -c, --set NAME=VALUE      override default setting for server parameter\n"));
 	printf(_("  -d, --debug               generate lots of debugging output\n"));
@@ -3107,6 +3111,7 @@ main(int argc, char *argv[])
 		{"icu-locale", required_argument, NULL, 16},
 		{"icu-rules", required_argument, NULL, 17},
 		{"sync-method", required_argument, NULL, 18},
+		{"rel-segsize", required_argument, NULL, 19},
 		{NULL, 0, NULL, 0}
 	};
 
@@ -3291,6 +3296,9 @@ main(int argc, char *argv[])
 				if (!parse_sync_method(optarg, &sync_method))
 					exit(1);
 				break;
+			case 19:
+				str_rel_segment_size = pg_strdup(optarg);
+				break;
 			default:
 				/* getopt_long already emitted a complaint */
 				pg_log_error_hint("Try \"%s --help\" for more information.", progname);
@@ -3357,6 +3365,43 @@ main(int argc, char *argv[])
 	if (!IsValidWalSegSize(wal_segment_size_mb * 1024 * 1024))
 		pg_fatal("argument of %s must be a power of two between 1 and 1024", "--wal-segsize");
 
+	/* set rel segment size */
+	if (str_rel_segment_size == NULL)
+	{
+		rel_segment_size = (1024 * 1024 * 1024) / BLCKSZ;
+	}
+	else
+	{
+		int64		bytes;
+		char	   *endptr;
+
+		bytes = strtol(str_rel_segment_size, &endptr, 10);
+		if (endptr == str_rel_segment_size)
+			pg_fatal("argument of --rel-segsize must begin with a number");
+		if (bytes == 0)
+			pg_fatal("argument of --rel-segsize must be greater than zero");
+
+		if (strcmp(endptr, "kB") == 0)
+			bytes *= 1024;
+		else if (strcmp(endptr, "MB") == 0)
+			bytes *= 1024 * 1024;
+		else if (strcmp(endptr, "GB") == 0)
+			bytes *= 1024 * 1024 * 1024;
+		else if (strcmp(endptr, "TB") == 0)
+			bytes *= UINT64CONST(1024) * 1024 * 1024 * 1024;
+		else
+			pg_fatal("argument of --rel-segsize must end with kB, MB, GB or TB");
+
+		if (bytes % BLCKSZ != 0)
+			pg_fatal("argument of --rel-segsize must be a multiple of BLCKSZ");
+		if (pg_popcount64(bytes) != 1)
+			pg_fatal("argument of --rel-segsize must be a power of two");
+		if (sizeof(off_t) < 8 && bytes > (1 << 31))
+			pg_fatal("argument of --rel-segsize is too large for this platform's off_t");
+
+		rel_segment_size = bytes / BLCKSZ;
+	}
+
 	get_restricted_token();
 
 	setup_pgdata();
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9e6fd435f60..17767b55606 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -223,7 +223,7 @@ scan_file(const char *fn, int segmentno)
 		if (PageIsNew(buf.data))
 			continue;
 
-		csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
+		csum = pg_checksum_page(buf.data, blockno + segmentno * ControlFile->relseg_size);
 		if (mode == PG_MODE_CHECK)
 		{
 			if (csum != header->pd_checksum)
diff --git a/src/bin/pg_combinebackup/reconstruct.c b/src/bin/pg_combinebackup/reconstruct.c
index 873d3079025..998f593968b 100644
--- a/src/bin/pg_combinebackup/reconstruct.c
+++ b/src/bin/pg_combinebackup/reconstruct.c
@@ -22,6 +22,10 @@
 #include "reconstruct.h"
 #include "storage/block.h"
 
+
+/* XXX this will need to be loaded out of a control file! */
+int64		rel_segment_size = 131072;
+
 /*
  * An rfile stores the data that we need in order to be able to use some file
  * on disk for reconstruction. For any given output file, we create one rfile
@@ -447,16 +451,18 @@ make_incremental_rfile(char *filename)
 
 	/* Read block count. */
 	read_bytes(rf, &rf->num_blocks, sizeof(rf->num_blocks));
-	if (rf->num_blocks > RELSEG_SIZE)
-		pg_fatal("file \"%s\" has block count %u in excess of segment size %u",
-				 filename, rf->num_blocks, RELSEG_SIZE);
+	if (rf->num_blocks > rel_segment_size)
+		pg_fatal("file \"%s\" has block count %u in excess of segment size "
+				 INT64_FORMAT,
+				 filename, rf->num_blocks, rel_segment_size);
 
 	/* Read truncation block length. */
 	read_bytes(rf, &rf->truncation_block_length,
 			   sizeof(rf->truncation_block_length));
-	if (rf->truncation_block_length > RELSEG_SIZE)
-		pg_fatal("file \"%s\" has truncation block length %u in excess of segment size %u",
-				 filename, rf->truncation_block_length, RELSEG_SIZE);
+	if (rf->truncation_block_length > rel_segment_size)
+		pg_fatal("file \"%s\" has truncation block length %u in excess of segment size "
+				 INT64_FORMAT,
+				 filename, rf->truncation_block_length, rel_segment_size);
 
 	/* Read block numbers if there are any. */
 	if (rf->num_blocks > 0)
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 93e0837947c..8687785bad5 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -304,7 +304,7 @@ main(int argc, char *argv[])
 	/* we don't print floatFormat since can't say much useful about it */
 	printf(_("Database block size:                  %u\n"),
 		   ControlFile->blcksz);
-	printf(_("Blocks per segment of large relation: %u\n"),
+	printf(_("Blocks per segment of large relation: " INT64_FORMAT "\n"),
 		   ControlFile->relseg_size);
 	printf(_("WAL block size:                       %u\n"),
 		   ControlFile->xlog_blcksz);
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index e9dcb5a6d89..92d34833ad9 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -690,7 +690,7 @@ GuessControlValues(void)
 	ControlFile.maxAlign = MAXIMUM_ALIGNOF;
 	ControlFile.floatFormat = FLOATFORMAT_VALUE;
 	ControlFile.blcksz = BLCKSZ;
-	ControlFile.relseg_size = RELSEG_SIZE;
+	ControlFile.relseg_size = 1024 * 1024 * 1024;
 	ControlFile.xlog_blcksz = XLOG_BLCKSZ;
 	ControlFile.xlog_seg_size = DEFAULT_XLOG_SEG_SIZE;
 	ControlFile.nameDataLen = NAMEDATALEN;
@@ -758,7 +758,7 @@ PrintControlValues(bool guessed)
 	/* we don't print floatFormat since can't say much useful about it */
 	printf(_("Database block size:                  %u\n"),
 		   ControlFile.blcksz);
-	printf(_("Blocks per segment of large relation: %u\n"),
+	printf(_("Blocks per segment of large relation: " INT64_FORMAT "\n"),
 		   ControlFile.relseg_size);
 	printf(_("WAL block size:                       %u\n"),
 		   ControlFile.xlog_blcksz);
diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c
index 255ddf2ffaf..0c7e4522b6d 100644
--- a/src/bin/pg_rewind/filemap.c
+++ b/src/bin/pg_rewind/filemap.c
@@ -296,8 +296,8 @@ process_target_wal_block_change(ForkNumber forknum, RelFileLocator rlocator,
 	BlockNumber blkno_inseg;
 	int			segno;
 
-	segno = blkno / RELSEG_SIZE;
-	blkno_inseg = blkno % RELSEG_SIZE;
+	segno = blkno / rel_segment_size;
+	blkno_inseg = blkno % rel_segment_size;;
 
 	path = datasegpath(rlocator, forknum, segno);
 	entry = lookup_filehash_entry(path);
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index bde90bf60bb..8553c565f76 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -62,6 +62,7 @@ static ControlFileData ControlFile_source_after;
 
 const char *progname;
 int			WalSegSz;
+int64		rel_segment_size;
 
 /* Configuration options */
 char	   *datadir_target = NULL;
@@ -1041,6 +1042,8 @@ digestControlFile(ControlFileData *ControlFile, const char *content,
 		exit(1);
 	}
 
+	rel_segment_size = ControlFile->relseg_size;
+
 	/* Additional checks on control file */
 	checkControlFile(ControlFile);
 }
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ec43cbe2c67..596741b2b8f 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -26,6 +26,7 @@ extern bool dry_run;
 extern bool do_sync;
 extern int	WalSegSz;
 extern DataDirSyncMethod sync_method;
+extern int64 rel_segment_size;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c
index a1fc5fec78d..9cd3b00fe40 100644
--- a/src/bin/pg_upgrade/relfilenumber.c
+++ b/src/bin/pg_upgrade/relfilenumber.c
@@ -183,7 +183,7 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
 
 	/*
 	 * Now copy/link any related segments as well. Remember, PG breaks large
-	 * files into 1GB segments, the first segment has no extension, subsequent
+	 * files into segments, the first segment has no extension, subsequent
 	 * segments are named relfilenumber.1, relfilenumber.2, relfilenumber.3.
 	 */
 	for (segno = 0;; segno++)
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index a00606ffcdf..354b15fbff1 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -204,7 +204,7 @@ typedef struct ControlFileData
 	 * compatible with the backend executable.
 	 */
 	uint32		blcksz;			/* data block size for this DB */
-	uint32		relseg_size;	/* blocks per segment of large relation */
+	int64		relseg_size;	/* blocks per segment of large relation */
 
 	uint32		xlog_blcksz;	/* block size within WAL files */
 	uint32		xlog_seg_size;	/* size of each WAL segment */
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 591e1ca3df6..50426f4c021 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -637,19 +637,6 @@
    your system. */
 #undef PTHREAD_CREATE_JOINABLE
 
-/* RELSEG_SIZE is the maximum number of blocks allowed in one disk file. Thus,
-   the maximum size of a single file is RELSEG_SIZE * BLCKSZ; relations bigger
-   than that are divided into multiple files. RELSEG_SIZE * BLCKSZ must be
-   less than your OS' limit on file size. This is often 2 GB or 4GB in a
-   32-bit operating system, unless you have large file support enabled. By
-   default, we make the limit 1 GB to avoid any possible integer-overflow
-   problems within the OS. A limit smaller than necessary only means we divide
-   a large relation into more chunks than necessary, so it seems best to err
-   in the direction of a small limit. A power-of-2 value is recommended to
-   save a few cycles in md.c, but is not absolutely required. Changing
-   RELSEG_SIZE requires an initdb. */
-#undef RELSEG_SIZE
-
 /* The size of `bool', as computed by sizeof. */
 #undef SIZEOF_BOOL
 
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index fc5f883ce14..4d853b71222 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -18,6 +18,8 @@
 #include "storage/block.h"
 #include "storage/relfilelocator.h"
 
+extern int64 rel_segment_size;
+
 /*
  * smgr.c maintains a table of SMgrRelation objects, which are essentially
  * cached file handles.  An SMgrRelation is created (if not already present)
@@ -109,6 +111,7 @@ extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
 extern void smgrregistersync(SMgrRelation reln, ForkNumber forknum);
 extern void AtEOXact_SMgr(void);
 extern bool ProcessBarrierSmgrRelease(void);
+extern const char *show_segment_size(void);
 
 static inline void
 smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h
index 0a2e274ebb2..2a8399b32c4 100644
--- a/src/include/utils/guc_tables.h
+++ b/src/include/utils/guc_tables.h
@@ -302,6 +302,7 @@ extern struct config_generic **get_explain_guc_options(int *num);
 
 /* get string value of variable */
 extern char *ShowGUCOption(struct config_generic *record, bool use_units);
+extern char *ShowGUCInt64WithUnits(int64 value, int flags);
 
 /* get whether or not the GUC variable is visible to current user */
 extern bool ConfigOptionIsVisible(struct config_generic *conf);
-- 
2.39.2

Reply via email to