On Thu, 2023-03-09 at 19:11 +0000, Jeff Davis wrote: > Use ICU by default at initdb time.
I'm seeing a failure on hoverfly: https://buildfarm.postgresql.org/cgi-bin/show_stage_log.pl?nm=hoverfly&dt=2023-03-09%2021%3A51%3A45&stg=initdb-en_US.8859-15 That's because ICU always uses UTF-8 by default. ICU works just fine with many other encodings; is there a reason it doesn't take it from the environment just like for provider=libc? Of course, we still need to default to UTF-8 when the encoding from the environment isn't supported by ICU. Patch attached. Requires a few test fixups to adapt. -- Jeff Davis PostgreSQL Contributor Team - AWS
From d1e101df09e4485976949dd47c9505dbda1de071 Mon Sep 17 00:00:00 2001 From: Jeff Davis <[email protected]> Date: Thu, 9 Mar 2023 16:00:45 -0800 Subject: [PATCH v1] initdb: obtain encoding from environment by default for ICU. The libc provider already did so, this just makes ICU consistent. Previously, if the provider was ICU, initdb defaulted to UTF-8, which might be in conflict with the locale from the environment. Per buildfarm failure on system "hoverfly" related to commit 27b62377b4. --- contrib/unaccent/meson.build | 2 +- src/bin/initdb/initdb.c | 12 +++++------- src/bin/pg_upgrade/t/002_pg_upgrade.pl | 15 +++++---------- src/bin/scripts/t/020_createdb.pl | 2 +- src/test/icu/t/010_database.pl | 2 +- 5 files changed, 13 insertions(+), 20 deletions(-) diff --git a/contrib/unaccent/meson.build b/contrib/unaccent/meson.build index 613dd0be22..284d34ee29 100644 --- a/contrib/unaccent/meson.build +++ b/contrib/unaccent/meson.build @@ -37,6 +37,6 @@ tests += { 'sql': [ 'unaccent', ], - 'regress_args': ['--encoding=UTF8'], + 'regress_args': ['--encoding=UTF8', '--no-locale'], }, } diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index bf88cd2439..04a6d58377 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2346,18 +2346,16 @@ setup_locale_encoding(void) lc_time); } - if (!encoding && locale_provider == COLLPROVIDER_ICU) - { - encodingid = PG_UTF8; - printf(_("The default database encoding has been set to \"%s\".\n"), - pg_encoding_to_char(encodingid)); - } - else if (!encoding) + if (!encoding) { int ctype_enc; ctype_enc = pg_get_encoding_from_locale(lc_ctype, true); + if (locale_provider == COLLPROVIDER_ICU && + (ctype_enc == -1 || !is_encoding_supported_by_icu(ctype_enc))) + ctype_enc = PG_UTF8; + if (ctype_enc == -1) { /* Couldn't recognize the locale's codeset */ diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 1b5df730e9..90669f3c6d 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -108,7 +108,7 @@ if ($oldnode->pg_version >= 11) my $original_encoding = "6"; # UTF-8 my $original_provider = "c"; -my $original_collate = "C"; +my $original_locale = "C"; my $original_iculocale = ""; my $provider_field = "'c' AS datlocprovider"; my $iculocale_field = "NULL AS daticulocale"; @@ -123,7 +123,7 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes') my @initdb_params = @custom_opts; push @initdb_params, ('--encoding', 'UTF-8'); -push @initdb_params, ('--lc-collate', $original_collate); +push @initdb_params, ('--locale', $original_locale); if ($original_provider eq "i") { push @initdb_params, ('--locale-provider', 'icu'); @@ -136,16 +136,12 @@ $oldnode->start; my $result; $result = $oldnode->safe_psql( - 'postgres', "SELECT encoding, $provider_field, datcollate, $iculocale_field + 'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field FROM pg_database WHERE datname='template0'"); -is($result, "$original_encoding|$original_provider|$original_collate|$original_iculocale", +is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale", "check locales in original cluster" ); -# check ctype, which was acquired from environment by initdb -my $original_ctype = $oldnode->safe_psql( - 'postgres', q{SELECT datctype FROM pg_database WHERE datname='template0'}); - # The default location of the source code is the root of this directory. my $srcdir = abs_path("../../.."); @@ -224,7 +220,6 @@ my $newnode = PostgreSQL::Test::Cluster->new('new_node'); # cluster. push @initdb_params, ('--encoding', 'SQL_ASCII'); push @initdb_params, ('--locale-provider', 'libc'); -push @initdb_params, ('--lc-ctype', 'C'); $node_params{extra} = \@initdb_params; $newnode->init(%node_params); @@ -401,7 +396,7 @@ if (-d $log_path) $result = $newnode->safe_psql( 'postgres', "SELECT encoding, $provider_field, datcollate, datctype, $iculocale_field FROM pg_database WHERE datname='template0'"); -is($result, "$original_encoding|$original_provider|$original_collate|$original_ctype|$original_iculocale", +is($result, "$original_encoding|$original_provider|$original_locale|$original_locale|$original_iculocale", "check that locales in new cluster match original cluster" ); diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index 8ec58cdd64..af3b1492e3 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -41,7 +41,7 @@ if ($ENV{with_icu} eq 'yes') [ 'createdb', '-T', 'template0', '-E', 'UTF8', '--locale-provider=icu', - '--icu-locale=en', 'foobar5' + '--locale=C', '--icu-locale=en', 'foobar5' ], qr/statement: CREATE DATABASE foobar5 .* LOCALE_PROVIDER icu ICU_LOCALE 'en'/, 'create database with ICU locale specified'); diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl index 45d77c319a..715b1bffd6 100644 --- a/src/test/icu/t/010_database.pl +++ b/src/test/icu/t/010_database.pl @@ -54,7 +54,7 @@ b), # Test error cases in CREATE DATABASE involving locale-related options my ($ret, $stdout, $stderr) = $node1->psql('postgres', - q{CREATE DATABASE dbicu LOCALE_PROVIDER icu TEMPLATE template0 ENCODING UTF8}); + q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8}); isnt($ret, 0, "ICU locale must be specified for ICU provider: exit code not 0"); like( -- 2.34.1
