Hello!

I discovered an interesting behaviour during installcheck runs when the cluster was initialized with ICU locale provider:

$ initdb --locale-provider icu --icu-locale en-US -D data &&
pg_ctl -D data -l logfile start

1) The ECPG tests fail because they use the SQL_ASCII encoding [1], the database template0 uses the ICU locale provider and SQL_ASCII is not supported by ICU:

$ make -C src/interfaces/ecpg/ installcheck
...
============== creating database "ecpg1_regression"   ==============
ERROR:  encoding "SQL_ASCII" is not supported with ICU provider
ERROR:  database "ecpg1_regression" does not exist
command failed: "/home/marina/postgresql/master/my/inst/bin/psql" -X -c "CREATE DATABASE \"ecpg1_regression\" TEMPLATE=template0 ENCODING='SQL_ASCII'" -c "ALTER DATABASE \"ecpg1_regression\" SET lc_messages TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_monetary TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_numeric TO 'C';ALTER DATABASE \"ecpg1_regression\" SET lc_time TO 'C';ALTER DATABASE \"ecpg1_regression\" SET bytea_output TO 'hex';ALTER DATABASE \"ecpg1_regression\" SET timezone_abbreviations TO 'Default';" "postgres"

2) The option --no-locale in pg_regress is described as "use C locale" [2]. But in this case the created databases actually use the ICU locale provider with the ICU cluster locale from template0 (see diff_check_backend_used_provider.patch):

$ make NO_LOCALE=1 installcheck

In regression.diffs:

diff -U3 /home/marina/postgresql/master/src/test/regress/expected/test_setup.out /home/marina/postgresql/master/src/test/regress/results/test_setup.out --- /home/marina/postgresql/master/src/test/regress/expected/test_setup.out 2022-09-27 05:31:27.674628815 +0300 +++ /home/marina/postgresql/master/src/test/regress/results/test_setup.out 2022-10-21 15:09:31.232992885 +0300
@@ -143,6 +143,798 @@
 \set filename :abs_srcdir '/data/person.data'
 COPY person FROM :'filename';
 VACUUM ANALYZE person;
+NOTICE:  varstrfastcmp_locale sss->collate_c 0 sss->locale 0xefacd0
+NOTICE:  varstrfastcmp_locale sss->locale->provider i
+NOTICE:  varstrfastcmp_locale sss->locale->info.icu.locale en-US
...

The patch diff_fix_pg_regress_create_database.patch fixes both issues for me.

[1] https://github.com/postgres/postgres/blob/ce20f8b9f4354b46b40fd6ebf7ce5c37d08747e0/src/interfaces/ecpg/test/Makefile#L18 [2] https://github.com/postgres/postgres/blob/ce20f8b9f4354b46b40fd6ebf7ce5c37d08747e0/src/test/regress/pg_regress.c#L1992

--
Marina Polyakova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index b57ed946c42bb54ede800e95045aa937a8dbad85..b3c0f6f753f8428274389844ccf9778a7ed47ea4 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -281,6 +281,14 @@ hashtext(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashtext lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashtext mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashtext mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) VARDATA_ANY(key),
@@ -337,6 +345,14 @@ hashtextextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashtextextended lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashtextextended mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashtextextended mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 02d462a659778016f3c4479d425ba0a84feb6e26..9627c84a7ccfb4c4013556a51c989e9e6d611634 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -243,6 +243,8 @@ pg_set_regex_collation(Oid collation)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "pg_set_regex_collation lc_ctype_is_c(collid) %d", lc_ctype_is_c(collation));
+
 	if (lc_ctype_is_c(collation))
 	{
 		/* C/POSIX collations use this path regardless of database encoding */
@@ -259,6 +261,14 @@ pg_set_regex_collation(Oid collation)
 		 */
 		pg_regex_locale = pg_newlocale_from_collation(collation);
 
+		elog(NOTICE, "pg_set_regex_collation pg_regex_locale %p", pg_regex_locale);
+		if (pg_regex_locale)
+		{
+			elog(NOTICE, "pg_set_regex_collation pg_regex_locale->provider %c", pg_regex_locale->provider);
+			if (pg_regex_locale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "pg_set_regex_collation pg_regex_locale->info.icu.locale %s", pg_regex_locale->info.icu.locale ? pg_regex_locale->info.icu.locale : "(null)");
+		}
+
 		if (pg_regex_locale && !pg_regex_locale->deterministic)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 26f498b5df4d8eb280a0e6af69fd92d4ce0d89b7..a0616a0457c9abe4635064964146027008271ff8 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1682,6 +1682,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_tolower lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1693,6 +1695,14 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_tolower mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_tolower mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_tolower mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1804,6 +1814,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_toupper lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1815,6 +1827,14 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_toupper mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_toupper mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_toupper mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
@@ -1927,6 +1947,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 	}
 
+	elog(NOTICE, "str_initcap lc_ctype_is_c(collid) %d", lc_ctype_is_c(collid));
+
 	/* C/POSIX collations use this path regardless of database encoding */
 	if (lc_ctype_is_c(collid))
 	{
@@ -1938,6 +1960,14 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "str_initcap mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "str_initcap mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "str_initcap mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 #ifdef USE_ICU
 		if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
 		{
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 68e2e6f7a719866eacc506e1780f6d1b58951599..dacf044a4ee05555b4742300d0c078b3880db60c 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1010,6 +1010,14 @@ hashbpchar(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashbpchar lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashbpchar mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashbpchar mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any((unsigned char *) keydata, keylen);
@@ -1070,6 +1078,14 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
+	elog(NOTICE, "hashbpcharextended lc_collate_is_c(collid) %d mylocale %p", lc_collate_is_c(collid), mylocale);
+	if (mylocale)
+	{
+		elog(NOTICE, "hashbpcharextended mylocale->provider %c", mylocale->provider);
+		if (mylocale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "hashbpcharextended mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+	}
+
 	if (!mylocale || mylocale->deterministic)
 	{
 		result = hash_any_extended((unsigned char *) keydata, keylen,
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c5e7ee7ca2d3073c067928cfa35c1e746218bb64..012565dd2986c7ae9781f61dbb5f724770575fab 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1521,6 +1521,8 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 
 	check_collation_set(collid);
 
+	elog(NOTICE, "varstr_cmp lc_collate_is_c(collid) %d", lc_collate_is_c(collid));
+
 	/*
 	 * Unfortunately, there is no strncoll(), so in the non-C locale case we
 	 * have to do some memory copying.  This turns out to be significantly
@@ -1543,6 +1545,14 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 
 		mylocale = pg_newlocale_from_collation(collid);
 
+		elog(NOTICE, "varstr_cmp mylocale %p", mylocale);
+		if (mylocale)
+		{
+			elog(NOTICE, "varstr_cmp mylocale->provider %c", mylocale->provider);
+			if (mylocale->provider == COLLPROVIDER_ICU)
+				elog(NOTICE, "varstr_cmp mylocale->info.icu.locale %s", mylocale->info.icu.locale ? mylocale->info.icu.locale : "(null)");
+		}
+
 		/*
 		 * memcmp() can't tell us which of two unequal strings sorts first,
 		 * but it's a cheap way to tell if they're equal.  Testing shows that
@@ -2377,6 +2387,14 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 		return sss->last_returned;
 	}
 
+	elog(NOTICE, "varstrfastcmp_locale sss->collate_c %d sss->locale %p", sss->collate_c, sss->locale);
+	if (sss->locale)
+	{
+		elog(NOTICE, "varstrfastcmp_locale sss->locale->provider %c", sss->locale->provider);
+		if (sss->locale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "varstrfastcmp_locale sss->locale->info.icu.locale %s", sss->locale->info.icu.locale ? sss->locale->info.icu.locale : "(null)");
+	}
+
 	if (sss->locale)
 	{
 		if (sss->locale->provider == COLLPROVIDER_ICU)
@@ -2472,6 +2490,14 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 	if (sss->typid == BPCHAROID)
 		len = bpchartruelen(authoritative_data, len);
 
+	elog(NOTICE, "varstr_abbrev_convert sss->collate_c %d sss->locale %p", sss->collate_c, sss->locale);
+	if (sss->locale)
+	{
+		elog(NOTICE, "varstr_abbrev_convert sss->locale->provider %c", sss->locale->provider);
+		if (sss->locale->provider == COLLPROVIDER_ICU)
+			elog(NOTICE, "varstr_abbrev_convert sss->locale->info.icu.locale %s", sss->locale->info.icu.locale ? sss->locale->info.icu.locale : "(null)");
+	}
+
 	/*
 	 * If we're using the C collation, use memcpy(), rather than strxfrm(), to
 	 * abbreviate keys.  The full comparator for the C locale is always
diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c
index dda076847a38a74a765ca1283226815e9d10ada1..4aa42c6b1c7f2f2c36af539bafca4d64cdb7db3a 100644
--- a/src/test/regress/pg_regress.c
+++ b/src/test/regress/pg_regress.c
@@ -1899,14 +1899,25 @@ create_database(const char *dbname)
 	/*
 	 * We use template0 so that any installation-local cruft in template1 will
 	 * not mess up the tests.
+	 *
+	 * Explicitly set the locale provider libc for the option --no-locale.
+	 * Otherwise during installcheck the new database may use the ICU locale
+	 * provider with the custom ICU locale from template0.
 	 */
 	header(_("creating database \"%s\""), dbname);
 	if (encoding)
+
+		/*
+		 * Explicitly set the locale provider libc for the manually chosen
+		 * encoding. Otherwise during installcheck the new database may use
+		 * the ICU locale provider (from template0) which does not support all
+		 * encodings.
+		 */
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0 ENCODING='%s'%s", dbname, encoding,
-						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C'" : "");
+						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C' LOCALE_PROVIDER='libc'" : " LOCALE_PROVIDER='libc'");
 	else
 		psql_add_command(buf, "CREATE DATABASE \"%s\" TEMPLATE=template0%s", dbname,
-						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C'" : "");
+						 (nolocale) ? " LC_COLLATE='C' LC_CTYPE='C' LOCALE_PROVIDER='libc'" : "");
 	psql_add_command(buf,
 					 "ALTER DATABASE \"%s\" SET lc_messages TO 'C';"
 					 "ALTER DATABASE \"%s\" SET lc_monetary TO 'C';"

Reply via email to