From d98b3d8145fed54312158432ad1a2e953d4c7736 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 2 Oct 2024 15:11:23 -0700
Subject: [PATCH v3 3/5] pg_upgrade: Preserve default char signedness value
 from old cluster.

Commit XXX introduced the 'default_char_signedness' field in
controlfile. Newly created database clusters always set this field to
'signed'.

This change ensures that pg_upgrade updates the
'default_char_signedness' to 'unsigned' if the source database cluster
has signedness=false. For source clusters from v17 or earlier, which
lack the 'default_char_signedness' information, pg_upgrade assumes the
source cluster was initialized on the same platform where pg_upgrade
is running. It then sets the 'default_char_signedness' value according
to the current platform's default character signedness.

Reviewed-by: Noah Misch
Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
---
 src/bin/pg_upgrade/controldata.c            | 44 +++++++++++++-
 src/bin/pg_upgrade/pg_upgrade.c             | 28 +++++++++
 src/bin/pg_upgrade/pg_upgrade.h             |  7 +++
 src/bin/pg_upgrade/t/005_char_signedness.pl | 66 +++++++++++++++++++++
 4 files changed, 144 insertions(+), 1 deletion(-)
 create mode 100644 src/bin/pg_upgrade/t/005_char_signedness.pl

diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c
index ab16716f319..0cfc5c2233d 100644
--- a/src/bin/pg_upgrade/controldata.c
+++ b/src/bin/pg_upgrade/controldata.c
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include <ctype.h>
+#include <limits.h>				/* for CHAR_MIN */
 
 #include "common/string.h"
 #include "pg_upgrade.h"
@@ -62,6 +63,7 @@ get_control_data(ClusterInfo *cluster)
 	bool		got_date_is_int = false;
 	bool		got_data_checksum_version = false;
 	bool		got_cluster_state = false;
+	bool		got_default_char_signedness = false;
 	char	   *lc_collate = NULL;
 	char	   *lc_ctype = NULL;
 	char	   *lc_monetary = NULL;
@@ -501,6 +503,25 @@ get_control_data(ClusterInfo *cluster)
 			cluster->controldata.data_checksum_version = str2uint(p);
 			got_data_checksum_version = true;
 		}
+		else if ((p = strstr(bufin, "Default char data signedness:")) != NULL)
+		{
+			p = strchr(p, ':');
+
+			if (p == NULL || strlen(p) <= 1)
+				pg_fatal("%d: controldata retrieval problem", __LINE__);
+
+			/* Skip the colon and any whitespace after it */
+			p++;
+			while (isspace((unsigned char) *p))
+				p++;
+
+			/* The value should be either 'signed' or 'unsigned' */
+			if (strcmp(p, "signed") != 0 && strcmp(p, "unsigned") != 0)
+				pg_fatal("%d: controldata retrieval problem", __LINE__);
+
+			cluster->controldata.default_char_signedness = strcmp(p, "signed") == 0;
+			got_default_char_signedness = true;
+		}
 	}
 
 	rc = pclose(output);
@@ -561,6 +582,21 @@ get_control_data(ClusterInfo *cluster)
 		}
 	}
 
+	/*
+	 * Pre-v18 database clusters don't have the default char signedness
+	 * information. We use the char signedness of the platform where
+	 * pg_upgrade was built.
+	 */
+	if (cluster->controldata.cat_ver < DEFAULT_CHAR_SIGNEDNESS_VAT_VER)
+	{
+		Assert(!got_default_char_signedness);
+#if CHAR_MIN != 0
+		cluster->controldata.default_char_signedness = true;
+#else
+		cluster->controldata.default_char_signedness = false;
+#endif
+	}
+
 	/* verify that we got all the mandatory pg_control data */
 	if (!got_xid || !got_oid ||
 		!got_multi || !got_oldestxid ||
@@ -572,7 +608,9 @@ get_control_data(ClusterInfo *cluster)
 		!got_index || !got_toast ||
 		(!got_large_object &&
 		 cluster->controldata.ctrl_ver >= LARGE_OBJECT_SIZE_PG_CONTROL_VER) ||
-		!got_date_is_int || !got_data_checksum_version)
+		!got_date_is_int || !got_data_checksum_version ||
+		(!got_default_char_signedness &&
+		 cluster->controldata.cat_ver >= DEFAULT_CHAR_SIGNEDNESS_VAT_VER))
 	{
 		if (cluster == &old_cluster)
 			pg_log(PG_REPORT,
@@ -641,6 +679,10 @@ get_control_data(ClusterInfo *cluster)
 		if (!got_data_checksum_version)
 			pg_log(PG_REPORT, "  data checksum version");
 
+		/* value added in Postgres 18 */
+		if (!got_default_char_signedness)
+			pg_log(PG_REPORT, "  default char signedness");
+
 		pg_fatal("Cannot continue without required control information, terminating");
 	}
 }
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 36c7f3879d5..cc7357b5599 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -54,6 +54,7 @@
  */
 #define RESTORE_TRANSACTION_SIZE 1000
 
+static void set_new_cluster_char_signedness(void);
 static void set_locale_and_encoding(void);
 static void prepare_new_cluster(void);
 static void prepare_new_globals(void);
@@ -154,6 +155,7 @@ main(int argc, char **argv)
 	 */
 
 	copy_xact_xlog_xid();
+	set_new_cluster_char_signedness();
 
 	/* New now using xids of the old system */
 
@@ -388,6 +390,32 @@ setup(char *argv0)
 	}
 }
 
+/*
+ * Set the new cluster's default char signedness using the old cluster's
+ * value.
+ */
+static void
+set_new_cluster_char_signedness(void)
+{
+	bool		new_char_signedness;
+
+	/* Inherit the source database's signedness */
+	new_char_signedness = old_cluster.controldata.default_char_signedness;
+
+	/* Change the char signedness of the new cluster, if necessary */
+	if (new_cluster.controldata.default_char_signedness != new_char_signedness)
+	{
+		prep_status("Setting the default char signedness for new cluster");
+
+		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
+				  "\"%s/pg_resetwal\" --char-signedness %s \"%s\"",
+				  new_cluster.bindir,
+				  new_char_signedness ? "signed" : "unsigned",
+				  new_cluster.pgdata);
+
+		check_ok();
+	}
+}
 
 /*
  * Copy locale and encoding information into the new cluster's template0.
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 0cdd675e4f1..ee65cf795b7 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -125,6 +125,12 @@ extern char *output_files[];
  */
 #define JSONB_FORMAT_CHANGE_CAT_VER 201409291
 
+/*
+ * The control file was changed to have the default char signedness,
+ * commit XXXXX.
+ */
+#define DEFAULT_CHAR_SIGNEDNESS_VAT_VER 202501161
+
 
 /*
  * Each relation is represented by a relinfo structure.
@@ -245,6 +251,7 @@ typedef struct
 	bool		date_is_int;
 	bool		float8_pass_by_value;
 	uint32		data_checksum_version;
+	bool		default_char_signedness;
 } ControlData;
 
 /*
diff --git a/src/bin/pg_upgrade/t/005_char_signedness.pl b/src/bin/pg_upgrade/t/005_char_signedness.pl
new file mode 100644
index 00000000000..780ad71406d
--- /dev/null
+++ b/src/bin/pg_upgrade/t/005_char_signedness.pl
@@ -0,0 +1,66 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Tests for handling the default char signedness during upgrade.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Can be changed to test the other modes
+my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';
+
+# Initialize old and new clusters
+my $old = PostgreSQL::Test::Cluster->new('old');
+my $new = PostgreSQL::Test::Cluster->new('new');
+$old->init();
+$new->init();
+
+# Check the default char signedness of both the old and the new clusters.
+# Newly created clusters unconditionally use 'signed'.
+command_like(
+	[ 'pg_controldata', $old->data_dir ],
+	qr/Default char data signedness:\s+signed/,
+	'default char signedness of old cluster is signed in control file');
+command_like(
+	[ 'pg_controldata', $new->data_dir ],
+	qr/Default char data signedness:\s+signed/,
+	'default char signedness is new cluster signed in control file');
+
+# Set the old cluster's default char signedness to unsigned for test.
+command_ok(
+    [
+     'pg_resetwal', '--char-signedness', 'unsigned',
+     '-f', $old->data_dir
+    ],
+    "set old cluster's default char signeness to unsigned");
+
+# Check if the value is successfully updated.
+command_like(
+	[ 'pg_controldata', $old->data_dir ],
+	qr/Default char data signedness:\s+unsigned/,
+    'updated default char signedness is unsigned in control file');
+
+# pg_upgrade should be successful.
+command_ok(
+    [ 'pg_upgrade', '--no-sync',
+      '-d', $old->data_dir,
+      '-D', $new->data_dir,
+      '-b', $old->config_data('--bindir'),
+      '-B', $new->config_data('--bindir'),
+      '-s', $new->host,
+      '-p', $old->port,
+      '-P', $new->port,
+      $mode ],
+    'run of pg_upgrade');
+
+# Check if the default char signedness of the new cluster inherited
+# the old cluster's value.
+command_like(
+	[ 'pg_controldata', $new->data_dir ],
+	qr/Default char data signedness:\s+unsigned/,
+    'the default char signedness is updated during pg_upgrade');
+
+done_testing();
-- 
2.43.5

