From 4ca3e79e0712ff0eaf5ce3e2bc36f6fffa45d3f3 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 2 Oct 2024 15:12:27 -0700
Subject: [PATCH v3 4/5] pg_upgrade: Add --set-char-signedness to set the
 default char signedness of new cluster.

This change adds a new option --set-char-signedness to pg_upgrade. It
enables user to set arbitrary signedness during pg_upgrade. This helps
cases where user who knew they copied the v17 source cluster from
x86 (signedness=true) to ARM (signedness=falese) can pg_upgrade
properly without the prerequisite of acquiring an x86 VM.

Reviewed-by: Noah Misch
Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
---
 doc/src/sgml/ref/pgupgrade.sgml             | 48 +++++++++++++++++++++
 src/bin/pg_upgrade/option.c                 | 12 ++++++
 src/bin/pg_upgrade/pg_upgrade.c             | 10 ++++-
 src/bin/pg_upgrade/pg_upgrade.h             |  3 ++
 src/bin/pg_upgrade/t/005_char_signedness.pl | 27 ++++++++++++
 5 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 4777381dac2..262632a9a3e 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -276,6 +276,54 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--set-char-signedness=</option><replaceable>option</replaceable></term>
+      <listitem>
+       <para>
+        Manually set the default char signedness of new clusters. Possible values
+        are <literal>signed</literal> and <literal>unsigned</literal>.
+       </para>
+       <para>
+        In the C language, the default signedness of the <type>char</type> type
+        (when not explicitly specified) varies across platforms. For example,
+        <type>char</type> defaults to <type>signed char</type> on x86 CPUs but
+        to <type>unsigned char</type> on ARM CPUs. When data stored using the
+        <type>char</type> type is persisted to disk, such as in GIN indexes,
+        this platform-dependent behavior results in incorrect data comparisons
+        in two scenarios:
+       </para>
+       <itemizedlist>
+        <listitem>
+         <para>
+          When data is moved between platforms with different char signedness.
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          When data is replicated using streaming replication across different architectures.
+         </para>
+        </listitem>
+       </itemizedlist>
+       <para>
+        Starting from <productname>PostgreSQL</productname> 18, database clusters
+        maintain their own default char signedness setting, which can be used as
+        a hint to ensure consistent behavior across platforms with different
+        default char signedness. By default, <application>pg_upgrade</application>
+        preserves the char signedness setting when upgrading from an existing cluster.
+        However, when upgrading from <productname>PostgreSQL</productname> 17 or
+        earlier, <application>pg_upgrade</application> adopts the char signedness
+        of the platform on which it was built.
+       </para>
+       <para>
+        This option allows you to explicitly set the default char signedness for
+        the new cluster, overriding any inherited values. This is particularly useful
+        when you plan to migrate the upgraded cluster to a platform with different
+        char signedness (for example, when moving from an x86-based system to an
+        ARM-based system).
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
diff --git a/src/bin/pg_upgrade/option.c b/src/bin/pg_upgrade/option.c
index 108eb7a1ba4..1a580d656bb 100644
--- a/src/bin/pg_upgrade/option.c
+++ b/src/bin/pg_upgrade/option.c
@@ -60,6 +60,7 @@ parseCommandLine(int argc, char *argv[])
 		{"copy", no_argument, NULL, 2},
 		{"copy-file-range", no_argument, NULL, 3},
 		{"sync-method", required_argument, NULL, 4},
+		{"set-char-signedness", required_argument, NULL, 5},
 
 		{NULL, 0, NULL, 0}
 	};
@@ -70,6 +71,7 @@ parseCommandLine(int argc, char *argv[])
 
 	user_opts.do_sync = true;
 	user_opts.transfer_mode = TRANSFER_MODE_COPY;
+	user_opts.char_signedness = -1;
 
 	os_info.progname = get_progname(argv[0]);
 
@@ -212,6 +214,14 @@ parseCommandLine(int argc, char *argv[])
 				user_opts.sync_method = pg_strdup(optarg);
 				break;
 
+			case 5:
+				if (pg_strcasecmp(optarg, "signed") == 0)
+					user_opts.char_signedness = 1;
+				else if (pg_strcasecmp(optarg, "unsigned") == 0)
+					user_opts.char_signedness = 0;
+				else
+					pg_fatal("invalid argument for option %s", "--set-char-signedness");
+				break;
 			default:
 				fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
 						os_info.progname);
@@ -306,6 +316,8 @@ usage(void)
 	printf(_("  --clone                       clone instead of copying files to new cluster\n"));
 	printf(_("  --copy                        copy files to new cluster (default)\n"));
 	printf(_("  --copy-file-range             copy files to new cluster with copy_file_range\n"));
+	printf(_("  --set-char-signedness=OPTION  set new cluster char signedness to \"signed\" or\n"));
+	printf(_("                                \"unsigned\"\n"));
 	printf(_("  --sync-method=METHOD          set method for syncing files to disk\n"));
 	printf(_("  -?, --help                    show this help, then exit\n"));
 	printf(_("\n"
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index cc7357b5599..cd8e72f7bde 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -399,8 +399,14 @@ set_new_cluster_char_signedness(void)
 {
 	bool		new_char_signedness;
 
-	/* Inherit the source database's signedness */
-	new_char_signedness = old_cluster.controldata.default_char_signedness;
+	/*
+	 * Use the specified char signedness if specifies. Otherwise we inherit
+	 * inherit the source database's signedness.
+	 */
+	if (user_opts.char_signedness != -1)
+		new_char_signedness = (user_opts.char_signedness == 1);
+	else
+		new_char_signedness = old_cluster.controldata.default_char_signedness;
 
 	/* Change the char signedness of the new cluster, if necessary */
 	if (new_cluster.controldata.default_char_signedness != new_char_signedness)
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index ee65cf795b7..ef86c7f3420 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -334,6 +334,9 @@ typedef struct
 	int			jobs;			/* number of processes/threads to use */
 	char	   *socketdir;		/* directory to use for Unix sockets */
 	char	   *sync_method;
+	int			char_signedness;	/* default char signedness: -1 for initial
+									 * value, 1 for "signed" and 0 for
+									 * "unsigned" */
 } UserOpts;
 
 typedef struct
diff --git a/src/bin/pg_upgrade/t/005_char_signedness.pl b/src/bin/pg_upgrade/t/005_char_signedness.pl
index 780ad71406d..a2236df776e 100644
--- a/src/bin/pg_upgrade/t/005_char_signedness.pl
+++ b/src/bin/pg_upgrade/t/005_char_signedness.pl
@@ -63,4 +63,31 @@ command_like(
 	qr/Default char data signedness:\s+unsigned/,
     'the default char signedness is updated during pg_upgrade');
 
+# Setup another set of old and new clusters.
+my $old2 = PostgreSQL::Test::Cluster->new('old2');
+my $new2 = PostgreSQL::Test::Cluster->new('new2');
+$old2->init();
+$new2->init();
+
+# pg_upgrade should be successful.
+command_ok(
+    [ 'pg_upgrade', '--no-sync',
+      '-d', $old->data_dir,
+      '-D', $new->data_dir,
+      '-b', $old->config_data('--bindir'),
+      '-B', $new->config_data('--bindir'),
+      '-s', $new->host,
+      '-p', $old->port,
+      '-P', $new->port,
+      '--set-char-signedness', 'unsigned',
+      $mode ],
+    'run of pg_upgrade with --set-char-signedness option');
+
+# Check if --set-char-signedness successfully sets the new cluster's
+# default char signedness.
+command_like(
+	[ 'pg_controldata', $new->data_dir ],
+	qr/Default char data signedness:\s+unsigned/,
+    '--set-char-signedness sets unsigned');
+
 done_testing();
-- 
2.43.5

