On Tue, 2023-03-14 at 23:47 -0700, Jeff Davis wrote:
> On Tue, 2023-03-14 at 10:10 -0700, Jeff Davis wrote:
> > One loose end is that we really should support language tags like
> > "und"
> > in those older versions (54 and earlier). Your commit d72900bded
> > avoided the problem, but perhaps we should fix it by looking for
> > "und"
> > and replacing it with "root" while opening, or something.
> 
> Attached are a few patches to implement this idea.

Here is an updated patch series that includes these earlier fixes for
older ICU versions, with the canonicalization patch last (0005).

I left out the validation patch for now, and I'm evaluating a different
approach that will attempt to match to the locales retrieved with
uloc_countAvailable()/uloc_getAvailable().


-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From 70d98770ce6c1795ab172adf10bda87dafa310e3 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Tue, 14 Mar 2023 09:58:29 -0700
Subject: [PATCH v5 1/5] Support language tags in older ICU versions (53 and
 earlier).

By calling uloc_canonicalize() before parsing the attributes, the
existing locale attribute parsing logic works on language tags as
well.

Fix a small memory leak, too.

Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.ca...@j-davis.com
---
 src/backend/commands/collationcmds.c          |  8 +++---
 src/backend/utils/adt/pg_locale.c             | 26 ++++++++++++++++---
 .../regress/expected/collate.icu.utf8.out     |  8 ++++++
 src/test/regress/sql/collate.icu.utf8.sql     |  4 +++
 4 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8949684afe..b8f2e7059f 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 			const char *name;
 			char	   *langtag;
 			char	   *icucomment;
-			const char *iculocstr;
 			Oid			collid;
 
 			if (i == -1)
@@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 				name = uloc_getAvailable(i);
 
 			langtag = get_icu_language_tag(name);
-			iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
 
 			/*
 			 * Be paranoid about not allowing any non-ASCII strings into
 			 * pg_collation
 			 */
-			if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
+			if (!pg_is_ascii(langtag) || !pg_is_ascii(langtag))
 				continue;
 
 			collid = CollationCreate(psprintf("%s-x-icu", langtag),
 									 nspid, GetUserId(),
 									 COLLPROVIDER_ICU, true, -1,
-									 NULL, NULL, iculocstr, NULL,
-									 get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
+									 NULL, NULL, langtag, NULL,
+									 get_collation_actual_version(COLLPROVIDER_ICU, langtag),
 									 true, true);
 			if (OidIsValid(collid))
 			{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 1d3d4d86d3..b9c7fbd511 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2643,9 +2643,28 @@ pg_attribute_unused()
 static void
 icu_set_collation_attributes(UCollator *collator, const char *loc)
 {
-	char	   *str = asc_tolower(loc, strlen(loc));
+	UErrorCode	status;
+	int32_t		len;
+	char	   *icu_locale_id;
+	char	   *lower_str;
+	char	   *str;
 
-	str = strchr(str, '@');
+	/* first, make sure the string is an ICU format locale ID */
+	status = U_ZERO_ERROR;
+	len = uloc_canonicalize(loc, NULL, 0, &status);
+	icu_locale_id = palloc(len + 1);
+	status = U_ZERO_ERROR;
+	len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("canonicalization failed for locale string \"%s\": %s",
+						loc, u_errorName(status))));
+
+	lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
+
+	pfree(icu_locale_id);
+
+	str = strchr(lower_str, '@');
 	if (!str)
 		return;
 	str++;
@@ -2660,7 +2679,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 			char	   *value;
 			UColAttribute uattr;
 			UColAttributeValue uvalue;
-			UErrorCode	status;
 
 			status = U_ZERO_ERROR;
 
@@ -2727,6 +2745,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 								loc, u_errorName(status))));
 		}
 	}
+
+	pfree(lower_str);
 }
 
 #endif							/* USE_ICU */
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 9a3e12e42d..6225b575ce 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
  t        | t
 (1 row)
 
+-- test language tags
+CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
+SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+ ?column? 
+----------
+ t
+(1 row)
+
 CREATE TABLE test1cs (x text COLLATE case_sensitive);
 CREATE TABLE test2cs (x text COLLATE case_sensitive);
 CREATE TABLE test3cs (x text COLLATE case_sensitive);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 0790068f31..64cbfd0a5b 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
 SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
 
+-- test language tags
+CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
+SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+
 CREATE TABLE test1cs (x text COLLATE case_sensitive);
 CREATE TABLE test2cs (x text COLLATE case_sensitive);
 CREATE TABLE test3cs (x text COLLATE case_sensitive);
-- 
2.34.1

From baf0aa5ff7db793424a73958e38d2ceb7d9877e4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Tue, 14 Mar 2023 21:21:17 -0700
Subject: [PATCH v5 2/5] Wrap ICU ucol_open().

Hide details of supporting older ICU versions in a wrapper
function. The current code only needs to handle
icu_set_collation_attributes(), but a subsequent commit will add
additional version-specific code.
---
 src/backend/utils/adt/pg_locale.c | 54 ++++++++++++++++---------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index b9c7fbd511..28d1715ece 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -143,8 +143,10 @@ static size_t uchar_length(UConverter *converter,
 static int32_t uchar_convert(UConverter *converter,
 							 UChar *dest, int32_t destlen,
 							 const char *str, int32_t srclen);
+#if U_ICU_VERSION_MAJOR_NUM < 54
 static void icu_set_collation_attributes(UCollator *collator, const char *loc);
 #endif
+#endif
 
 /*
  * pg_perm_setlocale
@@ -1420,24 +1422,35 @@ lc_ctype_is_c(Oid collation)
 
 struct pg_locale_struct default_locale;
 
-void
-make_icu_collator(const char *iculocstr,
-				  const char *icurules,
-				  struct pg_locale_struct *resultp)
+static UCollator *
+pg_ucol_open(const char *locale_str)
 {
-#ifdef USE_ICU
 	UCollator  *collator;
 	UErrorCode	status;
 
 	status = U_ZERO_ERROR;
-	collator = ucol_open(iculocstr, &status);
+	collator = ucol_open(locale_str, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						iculocstr, u_errorName(status))));
+						locale_str, u_errorName(status))));
+
+#if U_ICU_VERSION_MAJOR_NUM < 54
+	icu_set_collation_attributes(collator, locale_str);
+#endif
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
-		icu_set_collation_attributes(collator, iculocstr);
+	return collator;
+}
+
+void
+make_icu_collator(const char *iculocstr,
+				  const char *icurules,
+				  struct pg_locale_struct *resultp)
+{
+#ifdef USE_ICU
+	UCollator  *collator;
+
+	collator = pg_ucol_open(iculocstr);
 
 	/*
 	 * If rules are specified, we extract the rules of the standard collation,
@@ -1448,6 +1461,7 @@ make_icu_collator(const char *iculocstr,
 		const UChar *default_rules;
 		UChar	   *agg_rules;
 		UChar	   *my_rules;
+		UErrorCode	status;
 		int32_t		length;
 
 		default_rules = ucol_getRules(collator, &length);
@@ -1719,16 +1733,11 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 	if (collprovider == COLLPROVIDER_ICU)
 	{
 		UCollator  *collator;
-		UErrorCode	status;
 		UVersionInfo versioninfo;
 		char		buf[U_MAX_VERSION_STRING_LENGTH];
 
-		status = U_ZERO_ERROR;
-		collator = ucol_open(collcollate, &status);
-		if (U_FAILURE(status))
-			ereport(ERROR,
-					(errmsg("could not open collator for locale \"%s\": %s",
-							collcollate, u_errorName(status))));
+		collator = pg_ucol_open(collcollate);
+
 		ucol_getVersion(collator, versioninfo);
 		ucol_close(collator);
 
@@ -2639,6 +2648,7 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
  * ucol_open(), so this is only necessary for emulating this behavior on older
  * versions.
  */
+#if U_ICU_VERSION_MAJOR_NUM < 54
 pg_attribute_unused()
 static void
 icu_set_collation_attributes(UCollator *collator, const char *loc)
@@ -2748,6 +2758,7 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 
 	pfree(lower_str);
 }
+#endif
 
 #endif							/* USE_ICU */
 
@@ -2759,17 +2770,8 @@ check_icu_locale(const char *icu_locale)
 {
 #ifdef USE_ICU
 	UCollator  *collator;
-	UErrorCode	status;
-
-	status = U_ZERO_ERROR;
-	collator = ucol_open(icu_locale, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("could not open collator for locale \"%s\": %s",
-						icu_locale, u_errorName(status))));
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
-		icu_set_collation_attributes(collator, icu_locale);
+	collator = pg_ucol_open(icu_locale);
 	ucol_close(collator);
 #else
 	ereport(ERROR,
-- 
2.34.1

From 2b6e8998d016f5aa06d9d043fb538892162e966d Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Tue, 14 Mar 2023 22:28:21 -0700
Subject: [PATCH v5 3/5] Handle the "und" locale in ICU versions 54 and older.

The "und" locale is an alternative spelling of the root locale, but it
was not recognized until ICU 55. To maintain common behavior across
all supported ICU versions, check for "und" and replace with "root"
before opening.

Previously, the lack of support for "und" was dangerous, because
versions 54 and older fall back to the environment when a locale is
not found. If the user specified "und" for the language (which is
expected and documented), it could not only resolve to the wrong
collator, but it could unexpectedly change (which could lead to
corrupt indexes).

This effectively reverts commit d72900bded, which worked around the
problem for the built-in "unicode" collation, and is no longer
necessary.

Discussion: https://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.ca...@j-davis.com
Discussion: https://postgr.es/m/0c6fa66f2753217d2a40480a96bd2ccf023536a1.ca...@j-davis.com
---
 src/backend/utils/adt/pg_locale.c             | 30 +++++++++++++++++++
 src/bin/initdb/initdb.c                       |  2 +-
 src/include/catalog/catversion.h              |  2 +-
 .../regress/expected/collate.icu.utf8.out     |  7 +++++
 src/test/regress/sql/collate.icu.utf8.sql     |  2 ++
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 28d1715ece..a8cc8ffcb1 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1422,12 +1422,35 @@ lc_ctype_is_c(Oid collation)
 
 struct pg_locale_struct default_locale;
 
+#ifdef USE_ICU
+
 static UCollator *
 pg_ucol_open(const char *locale_str)
 {
 	UCollator  *collator;
 	UErrorCode	status;
 
+	/*
+	 * In ICU versions 55 and earlier, "und" is not a recognized spelling of
+	 * the root locale. If the first component of the locale is "und", replace
+	 * with "root" before opening.
+	 */
+#if U_ICU_VERSION_MAJOR_NUM < 55
+	char *fixed_str = NULL;
+
+	if (strncasecmp(locale_str, "und", strlen("und")) == 0 &&
+		!isalnum(locale_str[strlen("und")]))
+	{
+		const char *remainder = locale_str + strlen("und");
+
+		fixed_str = palloc(strlen("root") + strlen(remainder) + 1);
+		strcpy(fixed_str, "root");
+		strcat(fixed_str, remainder);
+
+		locale_str = fixed_str;
+	}
+#endif
+
 	status = U_ZERO_ERROR;
 	collator = ucol_open(locale_str, &status);
 	if (U_FAILURE(status))
@@ -1439,9 +1462,16 @@ pg_ucol_open(const char *locale_str)
 	icu_set_collation_attributes(collator, locale_str);
 #endif
 
+#if U_ICU_VERSION_MAJOR_NUM < 55
+	if (fixed_str != NULL)
+		pfree(fixed_str);
+#endif
+
 	return collator;
 }
 
+#endif
+
 void
 make_icu_collator(const char *iculocstr,
 				  const char *icurules,
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 68d430ed63..d48b7b6060 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -1498,7 +1498,7 @@ setup_collation(FILE *cmdfd)
 	 * that they win if libc defines a locale with the same name.
 	 */
 	PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, colliculocale)"
-				  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, '');\n\n",
+				  "VALUES (pg_nextoid('pg_catalog.pg_collation', 'oid', 'pg_catalog.pg_collation_oid_index'), 'unicode', 'pg_catalog'::regnamespace, %u, '%c', true, -1, 'und');\n\n",
 				  BOOTSTRAP_SUPERUSERID, COLLPROVIDER_ICU);
 
 	PG_CMD_PRINTF("INSERT INTO pg_collation (oid, collname, collnamespace, collowner, collprovider, collisdeterministic, collencoding, collcollate, collctype)"
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 309aed3703..b2eed22d46 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202303141
+#define CATALOG_VERSION_NO	202303151
 
 #endif
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 6225b575ce..f135200c99 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1312,6 +1312,13 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
  t
 (1 row)
 
+CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
+SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
+ ?column? 
+----------
+ t
+(1 row)
+
 CREATE TABLE test1cs (x text COLLATE case_sensitive);
 CREATE TABLE test2cs (x text COLLATE case_sensitive);
 CREATE TABLE test3cs (x text COLLATE case_sensitive);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 64cbfd0a5b..8105ebc8ae 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -521,6 +521,8 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
+CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
+SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
 
 CREATE TABLE test1cs (x text COLLATE case_sensitive);
 CREATE TABLE test2cs (x text COLLATE case_sensitive);
-- 
2.34.1

From aa3d9164177742e8a8c1ad25a3613d3ade1aa7b1 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 15 Mar 2023 11:27:12 -0700
Subject: [PATCH v5 4/5] Add SQL function pg_icu_language_tag().

Account for locales "C" and "POSIX", which correspond to the
language tag "en-US-u-va-posix".

Also, don't rely on a fixed-size buffer for language tags, as there is
no defined upper limit (cf. RFC5646 section 4.4).
---
 doc/src/sgml/func.sgml                        | 15 +++++
 src/backend/commands/collationcmds.c          | 44 +++++++-------
 src/backend/utils/adt/pg_locale.c             | 59 +++++++++++++++++++
 src/bin/pg_dump/t/002_pg_dump.pl              |  4 +-
 src/include/catalog/catversion.h              |  2 +-
 src/include/catalog/pg_proc.dat               |  5 ++
 src/include/utils/pg_locale.h                 |  1 +
 .../regress/expected/collate.icu.utf8.out     | 55 +++++++++++++++++
 src/test/regress/sql/collate.icu.utf8.sql     | 10 ++++
 9 files changed, 171 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 15314aa3ee..dd073b7e26 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -27453,6 +27453,21 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
         Use of this function is restricted to superusers.
        </para></entry>
       </row>
+
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>pg_icu_language_tag</primary>
+        </indexterm>
+        <function>pg_icu_language_tag</function> ( <parameter>locale</parameter> <type>text</type> )
+        <returnvalue>text</returnvalue>
+       </para>
+       <para>
+        Canonicalizes the given <parameter>locale</parameter> string into a
+        BCP 47 language tag (see <xref
+        linkend="collation-managing-create-icu"/>).
+       </para></entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index b8f2e7059f..c6dea866da 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -576,26 +576,6 @@ cmpaliases(const void *a, const void *b)
 
 
 #ifdef USE_ICU
-/*
- * Get the ICU language tag for a locale name.
- * The result is a palloc'd string.
- */
-static char *
-get_icu_language_tag(const char *localename)
-{
-	char		buf[ULOC_FULLNAME_CAPACITY];
-	UErrorCode	status;
-
-	status = U_ZERO_ERROR;
-	uloc_toLanguageTag(localename, buf, sizeof(buf), true, &status);
-	if (U_FAILURE(status))
-		ereport(ERROR,
-				(errmsg("could not convert locale name \"%s\" to language tag: %s",
-						localename, u_errorName(status))));
-
-	return pstrdup(buf);
-}
-
 /*
  * Get a comment (specifically, the display name) for an ICU locale.
  * The result is a palloc'd string, or NULL if we can't get a comment
@@ -957,7 +937,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 			else
 				name = uloc_getAvailable(i);
 
-			langtag = get_icu_language_tag(name);
+			langtag = icu_language_tag(name, false);
 
 			/*
 			 * Be paranoid about not allowing any non-ASCII strings into
@@ -1014,3 +994,25 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
 
 	PG_RETURN_INT32(ncreated);
 }
+
+/*
+ * pg_icu_language_tag
+ *
+ * Return the BCP47 language tag representation of the given locale string.
+ */
+Datum
+pg_icu_language_tag(PG_FUNCTION_ARGS)
+{
+#ifdef USE_ICU
+	text	*locale_text = PG_GETARG_TEXT_PP(0);
+	char	*locale_cstr = text_to_cstring(locale_text);
+	char	*langtag	 = icu_language_tag(locale_cstr, false);
+
+	PG_RETURN_TEXT_P(cstring_to_text(langtag));
+#else
+	ereport(ERROR,
+			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+			 errmsg("ICU is not supported in this build")));
+	PG_RETURN_NULL();
+#endif
+}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a8cc8ffcb1..0ac7ce9a80 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2810,6 +2810,65 @@ check_icu_locale(const char *icu_locale)
 #endif
 }
 
+#ifdef USE_ICU
+/*
+ * Return the BCP47 language tag representation of the requested locale.
+ *
+ * This function should be called before passing the string to ucol_open(),
+ * because conversion to a language tag also performs "level 2
+ * canonicalization". In addition to producing a consistent format, level 2
+ * canonicalization is able to more accurately interpret different input
+ * locale string formats, such as POSIX and .NET IDs.
+ */
+char *
+icu_language_tag(const char *loc_str, bool noError)
+{
+	UErrorCode	 status;
+	char		*langtag;
+	size_t		 buflen = 32;	/* arbitrary starting buffer size */
+	const bool	 strict = true;
+
+	/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
+	if ((pg_strncasecmp(loc_str, "c", 1) == 0 && !isalnum(loc_str[1])) ||
+		(pg_strncasecmp(loc_str, "posix", 5) == 0 && !isalnum(loc_str[5])))
+		return pstrdup("en-US-u-va-posix");
+
+	/*
+	 * A BCP47 language tag doesn't have a clearly-defined upper limit
+	 * (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
+	 * uloc_toLanguageTag() doesn't always return the ultimate length on the
+	 * first call, necessitating a loop.
+	 */
+	langtag = palloc(buflen);
+	while (true)
+	{
+		int32_t		len;
+
+		status = U_ZERO_ERROR;
+		len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
+		if (len < buflen || buflen >= MaxAllocSize)
+			break;
+
+		buflen = Min(buflen * 2, MaxAllocSize);
+		langtag = repalloc(langtag, buflen);
+	}
+
+	if (U_FAILURE(status))
+	{
+		pfree(langtag);
+		if (noError)
+			return NULL;
+
+		ereport(ERROR,
+				(errmsg("could not convert locale name \"%s\" to language tag: %s",
+						loc_str, u_errorName(status))));
+	}
+
+	return langtag;
+}
+
+#endif							/* USE_ICU */
+
 /*
  * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
  * Therefore we keep them here rather than with the mbutils code.
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index a22f27f300..0b38c0537b 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -1837,9 +1837,9 @@ my %tests = (
 
 	'CREATE COLLATION icu_collation' => {
 		create_order => 76,
-		create_sql   => "CREATE COLLATION icu_collation (PROVIDER = icu, LOCALE = 'C');",
+		create_sql   => "CREATE COLLATION icu_collation (PROVIDER = icu, LOCALE = 'en-US-u-va-posix');",
 		regexp =>
-		  qr/CREATE COLLATION public.icu_collation \(provider = icu, locale = 'C'(, version = '[^']*')?\);/m,
+		  qr/CREATE COLLATION public.icu_collation \(provider = icu, locale = 'en-US-u-va-posix'(, version = '[^']*')?\);/m,
 		icu => 1,
 		like      => { %full_runs, section_pre_data => 1, },
 	},
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index b2eed22d46..8b205ab161 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202303151
+#define CATALOG_VERSION_NO	202303161
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index fbc4aade49..063f77f70c 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11817,6 +11817,11 @@
   proname => 'pg_database_collation_actual_version', procost => '100',
   provolatile => 'v', prorettype => 'text', proargtypes => 'oid',
   prosrc => 'pg_database_collation_actual_version' },
+{ oid => '6273',
+  descr => 'get BCP47 language tag representation of locale',
+  proname => 'pg_icu_language_tag', procost => '100',
+  provolatile => 's', prorettype => 'text', proargtypes => 'text',
+  prosrc => 'pg_icu_language_tag' },
 
 # system management/monitoring related functions
 { oid => '3353', descr => 'list files in the log directory',
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f9ce428233..b78fe0d72c 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -119,6 +119,7 @@ extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 #ifdef USE_ICU
 extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
 extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
+extern char *icu_language_tag(const char *locale_str, bool noError);
 #endif
 extern void check_icu_locale(const char *icu_locale);
 
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index f135200c99..ae608c29be 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -11,6 +11,61 @@ SELECT getdatabaseencoding() <> 'UTF8' OR
 SET client_encoding TO UTF8;
 CREATE SCHEMA collate_tests;
 SET search_path = collate_tests;
+-- test language tag canonicalization
+SELECT pg_icu_language_tag('en_US');
+ pg_icu_language_tag 
+---------------------
+ en-US
+(1 row)
+
+SELECT pg_icu_language_tag('nonsense');
+ pg_icu_language_tag 
+---------------------
+ nonsense
+(1 row)
+
+SELECT pg_icu_language_tag('C.UTF-8');
+ pg_icu_language_tag 
+---------------------
+ en-US-u-va-posix
+(1 row)
+
+SELECT pg_icu_language_tag('POSIX');
+ pg_icu_language_tag 
+---------------------
+ en-US-u-va-posix
+(1 row)
+
+SELECT pg_icu_language_tag('en_US_POSIX');
+ pg_icu_language_tag 
+---------------------
+ en-US-u-va-posix
+(1 row)
+
+SELECT pg_icu_language_tag('@colStrength=secondary');
+ pg_icu_language_tag 
+---------------------
+ und-u-ks-level2
+(1 row)
+
+SELECT pg_icu_language_tag('');
+ pg_icu_language_tag 
+---------------------
+ und
+(1 row)
+
+SELECT pg_icu_language_tag('fr_CA.UTF-8');
+ pg_icu_language_tag 
+---------------------
+ fr-CA
+(1 row)
+
+SELECT pg_icu_language_tag('en_US@colStrength=primary');
+ pg_icu_language_tag 
+---------------------
+ en-US-u-ks-level1
+(1 row)
+
 CREATE TABLE collate_test1 (
     a int,
     b text COLLATE "en-x-icu" NOT NULL
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 8105ebc8ae..1cd823ef37 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -15,6 +15,16 @@ SET client_encoding TO UTF8;
 CREATE SCHEMA collate_tests;
 SET search_path = collate_tests;
 
+-- test language tag canonicalization
+SELECT pg_icu_language_tag('en_US');
+SELECT pg_icu_language_tag('nonsense');
+SELECT pg_icu_language_tag('C.UTF-8');
+SELECT pg_icu_language_tag('POSIX');
+SELECT pg_icu_language_tag('en_US_POSIX');
+SELECT pg_icu_language_tag('@colStrength=secondary');
+SELECT pg_icu_language_tag('');
+SELECT pg_icu_language_tag('fr_CA.UTF-8');
+SELECT pg_icu_language_tag('en_US@colStrength=primary');
 
 CREATE TABLE collate_test1 (
     a int,
-- 
2.34.1

From 7abe977948fb4a501a889b14838a71ebbd4f946a Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 15 Mar 2023 12:37:06 -0700
Subject: [PATCH v5 5/5] Canonicalize ICU locale names to language tags.

Convert to BCP47 language tags before storing in the catalog, except
during binary upgrade or when the locale comes from an existing
collation or template database.

Canonicalization is important, because it's able to handle more kinds
of locale strings than ucol_open(). Without canonicalizing first, a
locale string like "fr_CA.UTF-8" will be misinterpreted by
ucol_open().

The resulting language tags can vary slightly between ICU
versions. For instance, "@colBackwards=yes" is converted to
"und-u-kb-true" in older versions of ICU, and to the simpler (but
equivalent) "und-u-kb" in newer versions.

Discussion: https://postgr.es/m/8c7af6820aed94dc7bc259d2aa7f9663518e6137.ca...@j-davis.com
---
 src/backend/commands/collationcmds.c          | 38 ++++++++++++++
 src/backend/commands/dbcommands.c             | 32 ++++++++++++
 src/backend/utils/adt/pg_locale.c             |  9 ----
 src/bin/initdb/initdb.c                       | 49 +++++++++++++++++++
 .../regress/expected/collate.icu.utf8.out     | 25 +++++++++-
 src/test/regress/sql/collate.icu.utf8.sql     | 13 +++++
 6 files changed, 156 insertions(+), 10 deletions(-)

diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index c6dea866da..bc0e48d507 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -165,6 +165,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 		else
 			colliculocale = NULL;
 
+		/*
+		 * When the ICU locale comes from an existing collation, do not
+		 * canonicalize to a language tag.
+		 */
+
 		datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
 		if (!isnull)
 			collicurules = TextDatumGetCString(datum);
@@ -254,10 +259,43 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
 		}
 		else if (collprovider == COLLPROVIDER_ICU)
 		{
+#ifdef USE_ICU
+			char	*langtag;
+
 			if (!colliculocale)
 				ereport(ERROR,
 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 						 errmsg("parameter \"locale\" must be specified")));
+
+			check_icu_locale(colliculocale);
+
+			/*
+			 * During binary upgrade, preserve the locale string. Otherwise,
+			 * canonicalize to a language tag.
+			 */
+			if (!IsBinaryUpgrade)
+			{
+				langtag = icu_language_tag(colliculocale, true);
+				if (langtag)
+				{
+					ereport(NOTICE,
+							(errmsg("using language tag \"%s\" for locale \"%s\"",
+									langtag, colliculocale)));
+
+					colliculocale = langtag;
+				}
+				else
+				{
+					ereport(WARNING,
+							(errmsg("could not convert locale \"%s\" to language tag",
+									colliculocale)));
+				}
+			}
+#else
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("ICU is not supported in this build")));
+#endif
 		}
 
 		/*
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 4d5d5d6866..5935477f44 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1043,6 +1043,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	if (dblocprovider == COLLPROVIDER_ICU)
 	{
+#ifdef USE_ICU
+		char	*langtag;
+
 		if (!(is_encoding_supported_by_icu(encoding)))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -1059,6 +1062,35 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 					 errmsg("ICU locale must be specified")));
 
 		check_icu_locale(dbiculocale);
+
+		/*
+		 * During binary upgrade, or when the locale came from the template
+		 * database, preserve locale string. Otherwise, canonicalize to a
+		 * language tag.
+		 */
+		if (!IsBinaryUpgrade && dbiculocale != src_iculocale)
+		{
+			langtag = icu_language_tag(dbiculocale, true);
+			if (langtag)
+			{
+				ereport(NOTICE,
+						(errmsg("using language tag \"%s\" for locale \"%s\"",
+								langtag, dbiculocale)));
+
+				dbiculocale = langtag;
+			}
+			else
+			{
+				ereport(WARNING,
+						(errmsg("could not convert locale \"%s\" to language tag",
+								dbiculocale)));
+			}
+		}
+#else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("ICU is not supported in this build")));
+#endif
 	}
 	else
 	{
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 0ac7ce9a80..dd2857f1ff 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2790,27 +2790,18 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
 }
 #endif
 
-#endif							/* USE_ICU */
-
 /*
  * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
  */
 void
 check_icu_locale(const char *icu_locale)
 {
-#ifdef USE_ICU
 	UCollator  *collator;
 
 	collator = pg_ucol_open(icu_locale);
 	ucol_close(collator);
-#else
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("ICU is not supported in this build")));
-#endif
 }
 
-#ifdef USE_ICU
 /*
  * Return the BCP47 language tag representation of the requested locale.
  *
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index d48b7b6060..3ce7b5991c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -2039,6 +2039,50 @@ check_icu_locale_encoding(int user_enc)
 	return true;
 }
 
+#ifdef USE_ICU
+/*
+ * Convert to canonical BCP47 language tag. Must be consistent with
+ * icu_language_tag().
+ */
+static char *
+icu_language_tag(const char *loc_str)
+{
+	UErrorCode	 status;
+	char		*langtag;
+	size_t		 buflen = 32;	/* arbitrary starting buffer size */
+	const bool	 strict = true;
+
+	/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
+	if ((pg_strncasecmp(loc_str, "c", 1) == 0 && !isalnum(loc_str[1])) ||
+		(pg_strncasecmp(loc_str, "posix", 5) == 0 && !isalnum(loc_str[5])))
+		return pstrdup("en-US-u-va-posix");
+
+	langtag = pg_malloc(buflen);
+	while (true)
+	{
+		int32_t		len;
+
+		status = U_ZERO_ERROR;
+		len = uloc_toLanguageTag(loc_str, langtag, buflen, strict, &status);
+		if (len < buflen)
+			break;
+
+		buflen = buflen * 2;
+		langtag = pg_realloc(langtag, buflen);
+	}
+
+	if (U_FAILURE(status))
+	{
+		pg_free(langtag);
+
+		pg_fatal("could not convert locale name \"%s\" to language tag: %s",
+				 loc_str, u_errorName(status));
+	}
+
+	return langtag;
+}
+#endif
+
 /*
  * Check that ICU accepts the locale name; or if not specified, retrieve the
  * default ICU locale.
@@ -2049,6 +2093,7 @@ check_icu_locale(void)
 #ifdef USE_ICU
 	UCollator	*collator;
 	UErrorCode   status;
+	char		*langtag;
 
 	status = U_ZERO_ERROR;
 	collator = ucol_open(icu_locale, &status);
@@ -2080,6 +2125,10 @@ check_icu_locale(void)
 	}
 
 	ucol_close(collator);
+
+	langtag = icu_language_tag(icu_locale);
+	pg_free(icu_locale);
+	icu_locale = langtag;
 #endif
 }
 
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index ae608c29be..9715c12c93 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1074,6 +1074,7 @@ reset enable_seqscan;
 CREATE ROLE regress_test_role;
 CREATE SCHEMA test_schema;
 -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
 do $$
 BEGIN
   EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1088,9 +1089,11 @@ BEGIN
           quote_literal(current_setting('lc_collate')) || ');';
 END
 $$;
+RESET client_min_messages;
 CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
 ERROR:  parameter "locale" must be specified
 CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
+NOTICE:  using language tag "nonsense" for locale "nonsense"
 CREATE COLLATION test4 FROM nonsense;
 ERROR:  collation "nonsense" for encoding "UTF8" does not exist
 CREATE COLLATION test5 FROM test0;
@@ -1217,14 +1220,18 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
 
 -- test ICU collation customization
 -- test the attributes handled by icu_set_collation_attributes()
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+RESET client_min_messages;
 SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
  ?column? | ?column? 
 ----------+----------
  t        | t
 (1 row)
 
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+RESET client_min_messages;
 SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
  ?column? | ?column? 
 ----------+----------
@@ -1232,7 +1239,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll
 (1 row)
 
 CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
+NOTICE:  using language tag "und-u-kf-lower" for locale "@colCaseFirst=lower"
 CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper');
+NOTICE:  using language tag "und-u-kf-upper" for locale "@colCaseFirst=upper"
 SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first;
  ?column? | ?column? 
 ----------+----------
@@ -1240,13 +1249,16 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
 (1 row)
 
 CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
+NOTICE:  using language tag "und-u-ka-shifted" for locale "@colAlternate=shifted"
 SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
  ?column? | ?column? 
 ----------+----------
  t        | t
 (1 row)
 
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+RESET client_min_messages;
 SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
  ?column? | ?column? 
 ----------+----------
@@ -1258,6 +1270,7 @@ ERROR:  could not open collator for locale "@colNumeric=lower": U_ILLEGAL_ARGUME
 -- test that attributes not handled by icu_set_collation_attributes()
 -- (handled by ucol_open() directly) also work
 CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook');
+NOTICE:  using language tag "de-u-co-phonebk" for locale "de@collation=phonebook"
 SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook;
  ?column? | ?column? 
 ----------+----------
@@ -1266,6 +1279,7 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes
 
 -- rules
 CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g');
+NOTICE:  using language tag "und" for locale ""
 CREATE TABLE test7 (a text);
 -- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax
 INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green');
@@ -1293,10 +1307,13 @@ SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1;
 
 DROP TABLE test7;
 CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!');
-ERROR:  could not open collator for locale "" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
+NOTICE:  using language tag "und" for locale ""
+ERROR:  could not open collator for locale "und" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR
 -- nondeterministic collations
 CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true);
+NOTICE:  using language tag "und" for locale ""
 CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false);
+NOTICE:  using language tag "und" for locale ""
 CREATE TABLE test6 (a int, b text);
 -- same string in different normal forms
 INSERT INTO test6 VALUES (1, U&'\00E4bc');
@@ -1346,7 +1363,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet;
 (2 rows)
 
 CREATE COLLATION case_sensitive (provider = icu, locale = '');
+NOTICE:  using language tag "und" for locale ""
 CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false);
+NOTICE:  using language tag "und-u-ks-level2" for locale "@colStrength=secondary"
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
  ?column? | ?column? 
 ----------+----------
@@ -1361,6 +1380,7 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
 
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
+NOTICE:  using language tag "en-u-ks-level1" for locale "en-u-ks-level1"
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
  ?column? 
 ----------
@@ -1368,6 +1388,7 @@ SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
 (1 row)
 
 CREATE COLLATION lt_upperfirst (provider = icu, locale = 'und-u-kf-upper');
+NOTICE:  using language tag "und-u-kf-upper" for locale "und-u-kf-upper"
 SELECT 'Z' COLLATE lt_upperfirst < 'z' COLLATE lt_upperfirst;
  ?column? 
 ----------
@@ -1828,7 +1849,9 @@ SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
 (2 rows)
 
 -- accents
+SET client_min_messages=WARNING;
 CREATE COLLATION ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes', deterministic = false);
+RESET client_min_messages;
 CREATE TABLE test4 (a int, b text);
 INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
 SELECT * FROM test4 WHERE b = 'cote';
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 1cd823ef37..ddd1ab7e66 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -367,6 +367,8 @@ CREATE ROLE regress_test_role;
 CREATE SCHEMA test_schema;
 
 -- We need to do this this way to cope with varying names for encodings:
+SET client_min_messages TO WARNING;
+
 do $$
 BEGIN
   EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -380,6 +382,9 @@ BEGIN
           quote_literal(current_setting('lc_collate')) || ');';
 END
 $$;
+
+RESET client_min_messages;
+
 CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
 CREATE COLLATION testx (provider = icu, locale = 'nonsense'); /* never fails with ICU */  DROP COLLATION testx;
 
@@ -464,10 +469,14 @@ SELECT * FROM collate_test2 ORDER BY b COLLATE UNICODE;
 
 -- test the attributes handled by icu_set_collation_attributes()
 
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes');
+RESET client_min_messages;
 SELECT 'aaá' > 'AAA' COLLATE "und-x-icu", 'aaá' < 'AAA' COLLATE testcoll_ignore_accents;
 
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_backwards (provider = icu, locale = '@colBackwards=yes');
+RESET client_min_messages;
 SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll_backwards;
 
 CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower');
@@ -477,7 +486,9 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol
 CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted');
 SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted;
 
+SET client_min_messages=WARNING;
 CREATE COLLATION testcoll_numeric (provider = icu, locale = '@colNumeric=yes');
+RESET client_min_messages;
 SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_numeric;
 
 CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower');
@@ -666,7 +677,9 @@ INSERT INTO inner_text VALUES ('a', NULL);
 SELECT * FROM outer_text WHERE (f1, f2) NOT IN (SELECT * FROM inner_text);
 
 -- accents
+SET client_min_messages=WARNING;
 CREATE COLLATION ignore_accents (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes', deterministic = false);
+RESET client_min_messages;
 
 CREATE TABLE test4 (a int, b text);
 INSERT INTO test4 VALUES (1, 'cote'), (2, 'côte'), (3, 'coté'), (4, 'côté');
-- 
2.34.1

Reply via email to