On Thu, 2023-03-09 at 19:11 +0000, Jeff Davis wrote:
> Use ICU by default at initdb time.

I'm seeing a failure on hoverfly:

https://buildfarm.postgresql.org/cgi-bin/show_stage_log.pl?nm=hoverfly&dt=2023-03-09%2021%3A51%3A45&stg=initdb-en_US.8859-15

That's because ICU always uses UTF-8 by default. ICU works just fine
with many other encodings; is there a reason it doesn't take it from
the environment just like for provider=libc?

Of course, we still need to default to UTF-8 when the encoding from the
environment isn't supported by ICU.

Patch attached. Requires a few test fixups to adapt.

-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From d1e101df09e4485976949dd47c9505dbda1de071 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Thu, 9 Mar 2023 16:00:45 -0800
Subject: [PATCH v1] initdb: obtain encoding from environment by default for
 ICU.

The libc provider already did so, this just makes ICU
consistent. Previously, if the provider was ICU, initdb defaulted to
UTF-8, which might be in conflict with the locale from the
environment.

Per buildfarm failure on system "hoverfly" related to commit
27b62377b4.
---
 contrib/unaccent/meson.build           |  2 +-
 src/bin/initdb/initdb.c                | 12 +++++-------
 src/bin/pg_upgrade/t/002_pg_upgrade.pl | 15 +++++----------
 src/bin/scripts/t/020_createdb.pl      |  2 +-
 src/test/icu/t/010_database.pl         |  2 +-
 5 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/contrib/unaccent/meson.build b/contrib/unaccent/meson.build
index 613dd0be22..284d34ee29 100644
--- a/contrib/unaccent/meson.build
+++ b/contrib/unaccent/meson.build
@@ -37,6 +37,6 @@ tests += {
     'sql': [
       'unaccent',
     ],
-    'regress_args': ['--encoding=UTF8'],
+    'regress_args': ['--encoding=UTF8', '--no-locale'],
   },
 }
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index bf88cd2439..04a6d58377 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2346,18 +2346,16 @@ setup_locale_encoding(void)
 			   lc_time);
 	}
 
-	if (!encoding && locale_provider == COLLPROVIDER_ICU)
-	{
-		encodingid = PG_UTF8;
-		printf(_("The default database encoding has been set to \"%s\".\n"),
-			   pg_encoding_to_char(encodingid));
-	}
-	else if (!encoding)
+	if (!encoding)
 	{
 		int			ctype_enc;
 
 		ctype_enc = pg_get_encoding_from_locale(lc_ctype, true);
 
+		if (locale_provider == COLLPROVIDER_ICU &&
+			(ctype_enc == -1 || !is_encoding_supported_by_icu(ctype_enc)))
+			ctype_enc = PG_UTF8;
+
 		if (ctype_enc == -1)
 		{
 			/* Couldn't recognize the locale's codeset */
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 1b5df730e9..90669f3c6d 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -108,7 +108,7 @@ if ($oldnode->pg_version >= 11)
 
 my $original_encoding = "6"; # UTF-8
 my $original_provider = "c";
-my $original_collate = "C";
+my $original_locale = "C";
 my $original_iculocale = "";
 my $provider_field = "'c' AS datlocprovider";
 my $iculocale_field = "NULL AS daticulocale";
@@ -123,7 +123,7 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
 my @initdb_params = @custom_opts;
 
 push @initdb_params, ('--encoding', 'UTF-8');
-push @initdb_params, ('--lc-collate', $original_collate);
+push @initdb_params, ('--locale', $original_locale);
 if ($original_provider eq "i")
 {
 	push @initdb_params, ('--locale-provider', 'icu');
@@ -136,16 +136,12 @@ $oldnode->start;
 
 my $result;
 $result = $oldnode->safe_psql(
-	'postgres', "SELECT encoding, $provider_field, datcollate, $iculocale_field
+	'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
                  FROM pg_database WHERE datname='template0'");
-is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale",
+is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
 		"check locales in original cluster"
 	);
 
-# check ctype, which was acquired from environment by initdb
-my $original_ctype = $oldnode->safe_psql(
-	'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'});
-
 # The default location of the source code is the root of this directory.
 my $srcdir = abs_path("../../..");
 
@@ -224,7 +220,6 @@ my $newnode = PostgreSQL::Test::Cluster->new('new_node');
 # cluster.
 push @initdb_params, ('--encoding', 'SQL_ASCII');
 push @initdb_params, ('--locale-provider', 'libc');
-push @initdb_params, ('--lc-ctype', 'C');
 
 $node_params{extra} = \@initdb_params;
 $newnode->init(%node_params);
@@ -401,7 +396,7 @@ if (-d $log_path)
 $result = $newnode->safe_psql(
 	'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field
                  FROM pg_database WHERE datname='template0'");
-is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale",
+is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale",
 		"check that locales in new cluster match original cluster"
 	);
 
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 8ec58cdd64..af3b1492e3 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -41,7 +41,7 @@ if ($ENV{with_icu} eq 'yes')
 		[
 			'createdb',        '-T',
 			'template0',       '-E', 'UTF8', '--locale-provider=icu',
-			'--icu-locale=en', 'foobar5'
+			'--locale=C',      '--icu-locale=en', 'foobar5'
 		],
 		qr/statement: CREATE DATABASE foobar5 .* LOCALE_PROVIDER icu ICU_LOCALE 'en'/,
 		'create database with ICU locale specified');
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 45d77c319a..715b1bffd6 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -54,7 +54,7 @@ b),
 # Test error cases in CREATE DATABASE involving locale-related options
 
 my ($ret, $stdout, $stderr) = $node1->psql('postgres',
-	q{CREATE DATABASE dbicu LOCALE_PROVIDER icu TEMPLATE template0 ENCODING UTF8});
+	q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8});
 isnt($ret, 0,
 	"ICU locale must be specified for ICU provider: exit code not 0");
 like(
-- 
2.34.1

Reply via email to