From 312c9437fd272faff8385617cb057a7f5bf85b6e Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 25 Sep 2024 11:08:57 -0700
Subject: [PATCH v1 1/2] Add default_char_signedness to ControlFileData

The signedness of the 'char' type in C is
implementation-dependent. For instance, 'signed char' is used by
default on x86 CPUs, while 'unsigned char' is used on aarch CPUs. This
can lead to inconsistent results when comparing char data across
different platforms.

This commit introduces a new 'default_char_signedness' field in
ControlFileData to store the signedness of the 'char' type on the
platform where the database cluster was initialized.

While this change does not encourage the use of 'char' without
explicitly specifying its signedness, it provides a hint to ensure
consistent behavior for indexes (e.g., GIN and GiST) and tables that
already store data sorted by the 'char' type on disk, especially in
cross-platform replication scenarios.

XXX bump catversion.

Reviewed-by:
Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
---
 doc/src/sgml/func.sgml                  |  5 +++++
 src/backend/access/transam/xlog.c       | 23 +++++++++++++++++++++++
 src/backend/utils/misc/pg_controldata.c |  7 +++++--
 src/bin/pg_controldata/pg_controldata.c |  2 ++
 src/include/access/xlog.h               |  1 +
 src/include/catalog/pg_control.h        |  6 ++++++
 src/include/catalog/pg_proc.dat         |  6 +++---
 7 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 461fc3f437..e2e9d97e77 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -27831,6 +27831,11 @@ acl      | {postgres=arwdDxtm/postgres,foo=r/postgres}
        <entry><type>integer</type></entry>
       </row>
 
+      <row>
+       <entry><structfield>signed_char</structfield></entry>
+       <entry><type>bool</type></entry>
+      </row>
+
      </tbody>
     </tgroup>
    </table>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e96aa4d1cb..58cec9848d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -42,6 +42,7 @@
 #include <math.h>
 #include <time.h>
 #include <fcntl.h>
+#include <limits.h>	/* for CHAR_MIN */
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <unistd.h>
@@ -4256,6 +4257,18 @@ WriteControlFile(void)
 
 	ControlFile->float8ByVal = FLOAT8PASSBYVAL;
 
+	/*
+	 * The signedness of the char type is implementation-defined. For example
+	 * on x86 architecture CPUs, the char data type is typically treated as
+	 * signed by default whereas on aarch architecture CPUs it is typically
+	 * treated as unsigned by default.
+	 */
+#if CHAR_MIN != 0
+	ControlFile->default_char_signedness = true;
+#else
+	ControlFile->default_char_signedness = false;
+#endif
+
 	/* Contents are protected with a CRC */
 	INIT_CRC32C(ControlFile->crc);
 	COMP_CRC32C(ControlFile->crc,
@@ -4584,6 +4597,16 @@ DataChecksumsEnabled(void)
 	return (ControlFile->data_checksum_version > 0);
 }
 
+/*
+ * Return true if the cluster was initialized on a platform where
+ * the default signedness of char is "signed".
+ */
+bool
+GetDefaultCharSignedness(void)
+{
+	return ControlFile->default_char_signedness;
+}
+
 /*
  * Returns a fake LSN for unlogged relations.
  *
diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c
index 98c932dc7b..7c750bf0b9 100644
--- a/src/backend/utils/misc/pg_controldata.c
+++ b/src/backend/utils/misc/pg_controldata.c
@@ -203,8 +203,8 @@ pg_control_recovery(PG_FUNCTION_ARGS)
 Datum
 pg_control_init(PG_FUNCTION_ARGS)
 {
-	Datum		values[11];
-	bool		nulls[11];
+	Datum		values[12];
+	bool		nulls[12];
 	TupleDesc	tupdesc;
 	HeapTuple	htup;
 	ControlFileData *ControlFile;
@@ -254,6 +254,9 @@ pg_control_init(PG_FUNCTION_ARGS)
 	values[10] = Int32GetDatum(ControlFile->data_checksum_version);
 	nulls[10] = false;
 
+	values[11] = BoolGetDatum(ControlFile->default_char_signedness);
+	nulls[11] = false;
+
 	htup = heap_form_tuple(tupdesc, values, nulls);
 
 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 93a05d80ca..718e7940fd 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -323,6 +323,8 @@ main(int argc, char *argv[])
 		   _("64-bit integers"));
 	printf(_("Float8 argument passing:              %s\n"),
 		   (ControlFile->float8ByVal ? _("by value") : _("by reference")));
+	printf(_("Default char data signedness:         %s\n"),
+		   (ControlFile->default_char_signedness ? _("signed") : _("unsigned")));
 	printf(_("Data page checksum version:           %u\n"),
 		   ControlFile->data_checksum_version);
 	printf(_("Mock authentication nonce:            %s\n"),
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 083810f5b4..ef64f67424 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -230,6 +230,7 @@ extern XLogRecPtr GetXLogWriteRecPtr(void);
 extern uint64 GetSystemIdentifier(void);
 extern char *GetMockAuthenticationNonce(void);
 extern bool DataChecksumsEnabled(void);
+extern bool GetDefaultCharSignedness(void);
 extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
 extern Size XLOGShmemSize(void);
 extern void XLOGShmemInit(void);
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index e80ff8e414..3663790589 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -221,6 +221,12 @@ typedef struct ControlFileData
 	/* Are data pages protected by checksums? Zero if no checksum version */
 	uint32		data_checksum_version;
 
+	/*
+	 * True if the default signedness of char is "signed" on a platform where
+	 * the cluster is initialized.
+	 */
+	bool		default_char_signedness;
+
 	/*
 	 * Random nonce, used in authentication requests that need to proceed
 	 * based on values that are cluster-unique, like a SASL exchange that
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index ff5436acac..acdf63aa62 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12060,9 +12060,9 @@
   descr => 'pg_controldata init state information as a function',
   proname => 'pg_control_init', provolatile => 'v', prorettype => 'record',
   proargtypes => '',
-  proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}',
+  proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4,bool}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version,default_char_signedness}',
   prosrc => 'pg_control_init' },
 
 # subscripting support for built-in types
-- 
2.39.3

