Hello!

This is a copy of [1] moved to a separated thread for Commitfest..

I discovered an interesting behaviour during installcheck runs on PG 15+ when the cluster was initialized with ICU locale provider:

$ initdb --locale-provider icu --icu-locale en-US -D data &&
pg_ctl -D data -l logfile start

1) The ECPG tests fail because they use the SQL_ASCII encoding [2], the database template0 uses the ICU locale provider and SQL_ASCII is not supported by ICU:

$ make -C src/interfaces/ecpg/ installcheck
...
============== creating database "ecpg1_regression"   ==============
ERROR:  encoding "SQL_ASCII" is not supported with ICU provider
ERROR:  database "ecpg1_regression" does not exist
command failed: "/home/marina/postgresql/master/my/inst/bin/psql" -X -c "CREATE DATABASE \"ecpg1_regression\" TEMPLATE=template0 ENCODING='SQL_ASCII'" -c "ALTER DATABASE \"ecpg1_regression\" SET lc_messages TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_monetary TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_numeric TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_time TO 'C';ALTER DATABASE \"ecpg1_regression\" SET bytea_output TO 'hex';ALTER DATABASE \"ecpg1_regression\" SET timezone_abbreviations TO 'Default';" "postgres"

2) The option --no-locale in pg_regress is described as "use C locale" [3]. But in this case the created databases actually use the ICU locale provider with the ICU cluster locale from template0 (see diff_check_backend_used_provider.txt):

$ make NO_LOCALE=1 installcheck

In regression.diffs:

diff -U3 /home/marina/postgresql/master/src/test/regress/expected/test_setup.out /home/marina/postgresql/master/src/test/regress/results/test_setup.out --- /home/marina/postgresql/master/src/test/regress/expected/test_setup.out 2022-09-27 05:31:27.674628815 +0300 +++ /home/marina/postgresql/master/src/test/regress/results/test_setup.out 2022-10-21 15:09:31.232992885 +0300
@@ -143,6 +143,798 @@
 \set filename :abs_srcdir '/data/person.data'
 COPY person FROM :'filename';
 VACUUM ANALYZE person;
+NOTICE:  varstrfastcmp_locale sss->collate_c 0 sss->locale 0xefacd0
+NOTICE:  varstrfastcmp_locale sss->locale->provider i
+NOTICE:  varstrfastcmp_locale sss->locale->info.icu.locale en-US
...

The patch v1-0001-Fix-database-creation-during-installchecks-for-IC.patch fixes both issues for me.

[1] https://www.postgresql.org/message-id/727b5d5160f845dcf5e0818e625a6e56%40postgrespro.ru [2] https://github.com/postgres/postgres/blob/ce20f8b9f4354b46b40fd6ebf7ce5c37d08747e0/src/interfaces/ecpg/test/Makefile#L18 [3] https://github.com/postgres/postgres/blob/ce20f8b9f4354b46b40fd6ebf7ce5c37d08747e0/src/test/regress/pg_regress.c#L1992

--
Marina Polyakova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index b57ed946c42bb54ede800e95045aa937a8dbad85..b3c0f6f753f8428274389844ccf9778a7ed47ea4 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -281,6 +281,14 @@ hashtext(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashtext lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashtext mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashtext mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) VARDATA_ANY(key),
@@ -337,6 +345,14 @@ hashtextextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashtextextended lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashtextextended mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashtextextended mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 02d462a659778016f3c4479d425ba0a84feb6e26..9627c84a7ccfb4c4013556a51c989e9e6d611634 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -243,6 +243,8 @@ pg_set_regex_collation(Oid collation)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "pg_set_regex_collation lc_ctype_is_c(collid) %d", lc_ctype_is_c(collation));
+
 	if (lc_ctype_is_c(collation))
 	{
 		/* C/POSIX collations use this path regardless of database encoding */
@@ -259,6 +261,14 @@ pg_set_regex_collation(Oid collation)
 		 */
 		pg_regex_locale = pg_newlocale_from_collation(collation);
 
+		elog(NOTICE, "pg_set_regex_collation pg_regex_locale %p", pg_regex_locale);
+		if (pg_regex_locale)
+		{
+			elog(NOTICE, "pg_set_regex_collation pg_regex_locale->provider %c", pg_regex_locale->provider);
+			if (pg_regex_locale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "pg_set_regex_collation pg_regex_locale->info.icu.locale %s", pg_regex_locale->info.icu.locale ? pg_regex_locale->info.icu.locale : "(null)");
+		}
+
 		if (pg_regex_locale && !pg_regex_locale->deterministic)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 26f498b5df4d8eb280a0e6af69fd92d4ce0d89b7..a0616a0457c9abe4635064964146027008271ff8 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1682,6 +1682,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_tolower lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1693,6 +1695,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_tolower mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_tolower mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_tolower mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1804,6 +1814,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_toupper lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1815,6 +1827,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_toupper mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_toupper mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_toupper mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1927,6 +1947,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_initcap lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1938,6 +1960,14 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_initcap mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_initcap mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_initcap mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 68e2e6f7a719866eacc506e1780f6d1b58951599..dacf044a4ee05555b4742300d0c078b3880db60c 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1010,6 +1010,14 @@ hashbpchar(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashbpchar lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashbpchar mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashbpchar mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) keydata, keylen);
@@ -1070,6 +1078,14 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashbpcharextended lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashbpcharextended mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashbpcharextended mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) keydata, keylen,
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c5e7ee7ca2d3073c067928cfa35c1e746218bb64..012565dd2986c7ae9781f61dbb5f724770575fab 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1521,6 +1521,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 
 	check_collation_set(collid);
 
+	elog(NOTICE, "varstr_cmp lc_collate_is_c(collid) %d", lc_collate_is_c(collid));
+
 	/*
 	 * Unfortunately, there is no strncoll(), so in the non-C locale case we
 	 * have to do some memory copying.  This turns out to be significantly
@@ -1543,6 +1545,14 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "varstr_cmp mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "varstr_cmp mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "varstr_cmp mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 		/*
 		 * memcmp() can't tell us which of two unequal strings sorts first,
 		 * but it's a cheap way to tell if they're equal.  Testing shows that
@@ -2377,6 +2387,14 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 		return sss->last_returned;
 	}
 
+	elog(NOTICE, "varstrfastcmp_locale sss->collate_c %d sss->locale %p", sss->collate_c, sss->locale);
+	if (sss->locale)
+	{
+		elog(NOTICE, "varstrfastcmp_locale sss->locale->provider %c", sss->locale->provider);
+		if (sss->locale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "varstrfastcmp_locale sss->locale->info.icu.locale %s", sss->locale->info.icu.locale ? sss->locale->info.icu.locale : "(null)");
+	}
+
 	if (sss->locale)
 	{
 		if (sss->locale->provider == COLLPROVIDER_ICU)
@@ -2472,6 +2490,14 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 	if (sss->typid == BPCHAROID)
 		len = bpchartruelen(authoritative_data, len);
 
+	elog(NOTICE, "varstr_abbrev_convert sss->collate_c %d sss->locale %p", sss->collate_c, sss->locale);
+	if (sss->locale)
+	{
+		elog(NOTICE, "varstr_abbrev_convert sss->locale->provider %c", sss->locale->provider);
+		if (sss->locale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "varstr_abbrev_convert sss->locale->info.icu.locale %s", sss->locale->info.icu.locale ? sss->locale->info.icu.locale : "(null)");
+	}
+
 	/*
 	 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
 	 * abbreviate keys.  The full comparator for the C locale is always
From b52431f6494b61ac2c825fcaeb79be2b93c5ddfd Mon Sep 17 00:00:00 2001
From: Marina Polyakova <m.polyak...@postgrespro.ru>
Date: Sat, 29 Oct 2022 12:50:24 +0300
Subject: [PATCH v1] Fix database creation during installchecks for ICU cluster

1) Explicitly set the locale provider libc for the option --no-locale.
   Otherwise during installcheck the new database may use the ICU locale
   provider with the custom ICU locale from template0.

2) Explicitly set the locale provider libc for the manually chosen
   encoding. Otherwise during installcheck the new database may use
   the ICU locale provider (from template0) which does not support all
   encodings. This is important for ECPG tests that require SQL_ASCII encoding.
---
 src/test/regress/pg_regress.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index dda076847a..4aa42c6b1c 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -1899,14 +1899,25 @@ create_database(const char *dbname)
 	/*
 	 * We use template0 so that any installation-local cruft in template1 will
 	 * not mess up the tests.
+	 *
+	 * Explicitly set the locale provider libc for the option --no-locale.
+	 * Otherwise during installcheck the new database may use the ICU locale
+	 * provider with the custom ICU locale from template0.
 	 */
 	header(_("creating database \"%s\""), dbname);
 	if (encoding)
+
+		/*
+		 * Explicitly set the locale provider libc for the manually chosen
+		 * encoding. Otherwise during installcheck the new database may use
+		 * the ICU locale provider (from template0) which does not support all
+		 * encodings.
+		 */
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0 ENCODING='%s'%s", dbname, encoding,
-						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C'" : "");
+						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C' LOCALE_PROVIDER='libc'" : " LOCALE_PROVIDER='libc'");
 	else
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0%s", dbname,
-						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C'" : "");
+						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C' LOCALE_PROVIDER='libc'" : "");
 	psql_add_command(buf,
 					 "ALTER DATABASE \"%s\" SET lc_messages TO 'C';"
 					 "ALTER DATABASE \"%s\" SET lc_monetary TO 'C';"
-- 
2.25.1

Reply via email to