On Thu, 2024-06-06 at 11:37 -0700, Jeff Davis wrote:
> 
> I think this patch series is a nice cleanup, as well, making libc
> more
> like the other providers and not dependent on global state.

New rebased series attached with additional cleanup. Now that
pg_locale_t is never NULL, we can simplify the way the collation cache
works, eliminating ~100 lines.

-- 
Jeff Davis
PostgreSQL Contributor Team - AWS


From d3862b88d8df3372ebdd368489a86142bd11f42c Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 11:45:55 -0700
Subject: [PATCH v2 1/7] Make database default collation internal to
 pg_locale.c.

---
 src/backend/utils/adt/pg_locale.c | 64 ++++++++++++++++++++++++++++++-
 src/backend/utils/init/postinit.c | 35 ++---------------
 src/include/utils/pg_locale.h     |  3 +-
 3 files changed, 66 insertions(+), 36 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 7e5bb2b703..29f16c49cb 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -56,6 +56,7 @@
 
 #include "access/htup_details.h"
 #include "catalog/pg_collation.h"
+#include "catalog/pg_database.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
@@ -116,6 +117,8 @@ char	   *localized_full_months[12 + 1];
 /* is the databases's LC_CTYPE the C locale? */
 bool		database_ctype_is_c = false;
 
+static struct pg_locale_struct default_locale;
+
 /* indicates whether locale information cache is valid */
 static bool CurrentLocaleConvValid = false;
 static bool CurrentLCTimeValid = false;
@@ -1443,8 +1446,6 @@ lc_ctype_is_c(Oid collation)
 	return (lookup_collation_cache(collation, true))->ctype_is_c;
 }
 
-struct pg_locale_struct default_locale;
-
 void
 make_icu_collator(const char *iculocstr,
 				  const char *icurules,
@@ -1537,6 +1538,65 @@ pg_locale_deterministic(pg_locale_t locale)
 		return locale->deterministic;
 }
 
+void
+pg_init_database_collation()
+{
+	HeapTuple	tup;
+	Form_pg_database dbform;
+	Datum		datum;
+	bool		isnull;
+
+	/* Fetch our pg_database row normally, via syscache */
+	tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
+	if (!HeapTupleIsValid(tup))
+		elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
+	dbform = (Form_pg_database) GETSTRUCT(tup);
+
+	if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
+	{
+		char	*datlocale;
+
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+		datlocale = TextDatumGetCString(datum);
+
+		builtin_validate_locale(dbform->encoding, datlocale);
+
+		default_locale.info.builtin.locale = MemoryContextStrdup(
+																 TopMemoryContext, datlocale);
+	}
+	else if (dbform->datlocprovider == COLLPROVIDER_ICU)
+	{
+		char	*datlocale;
+		char	*icurules;
+
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+		datlocale = TextDatumGetCString(datum);
+
+		datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
+		if (!isnull)
+			icurules = TextDatumGetCString(datum);
+		else
+			icurules = NULL;
+
+		make_icu_collator(datlocale, icurules, &default_locale);
+	}
+	else
+	{
+		Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
+	}
+
+	default_locale.provider = dbform->datlocprovider;
+
+	/*
+	 * Default locale is currently always deterministic.  Nondeterministic
+	 * locales currently don't support pattern matching, which would break a
+	 * lot of things if applied globally.
+	 */
+	default_locale.deterministic = true;
+
+	ReleaseSysCache(tup);
+}
+
 /*
  * Create a locale_t from a collation OID.  Results are cached for the
  * lifetime of the backend.  Thus, do not free the result with freelocale().
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 0805398e24..6347efdd5a 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -423,43 +423,14 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 		strcmp(ctype, "POSIX") == 0)
 		database_ctype_is_c = true;
 
-	if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
-	{
-		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
-		datlocale = TextDatumGetCString(datum);
-
-		builtin_validate_locale(dbform->encoding, datlocale);
-
-		default_locale.info.builtin.locale = MemoryContextStrdup(
-																 TopMemoryContext, datlocale);
-	}
-	else if (dbform->datlocprovider == COLLPROVIDER_ICU)
-	{
-		char	   *icurules;
+	pg_init_database_collation();
 
-		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+	datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datlocale, &isnull);
+	if (!isnull)
 		datlocale = TextDatumGetCString(datum);
-
-		datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
-		if (!isnull)
-			icurules = TextDatumGetCString(datum);
-		else
-			icurules = NULL;
-
-		make_icu_collator(datlocale, icurules, &default_locale);
-	}
 	else
 		datlocale = NULL;
 
-	default_locale.provider = dbform->datlocprovider;
-
-	/*
-	 * Default locale is currently always deterministic.  Nondeterministic
-	 * locales currently don't support pattern matching, which would break a
-	 * lot of things if applied globally.
-	 */
-	default_locale.deterministic = true;
-
 	/*
 	 * Check collation version.  See similar code in
 	 * pg_newlocale_from_collation().  Note that here we warn instead of error
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 040968d6ff..3a9026e7b7 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -93,13 +93,12 @@ struct pg_locale_struct
 
 typedef struct pg_locale_struct *pg_locale_t;
 
-extern PGDLLIMPORT struct pg_locale_struct default_locale;
-
 extern void make_icu_collator(const char *iculocstr,
 							  const char *icurules,
 							  struct pg_locale_struct *resultp);
 
 extern bool pg_locale_deterministic(pg_locale_t locale);
+extern void pg_init_database_collation(void);
 extern pg_locale_t pg_newlocale_from_collation(Oid collid);
 
 extern char *get_collation_actual_version(char collprovider, const char *collcollate);
-- 
2.34.1

From 6bce19a5ce602f718eb29c6d8c24adc4b648c2ee Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 15:02:26 -0700
Subject: [PATCH v2 2/7] Make database collation pg_locale_t always non-NULL.

Previously, the database collation's pg_locale_t was NULL for the libc
provider.

This commit properly initializes a pg_locale_t object in all cases.
---
 src/backend/utils/adt/pg_locale.c | 58 +++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 29f16c49cb..185b860dad 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1582,7 +1582,58 @@ pg_init_database_collation()
 	}
 	else
 	{
+		const char *datcollate;
+		const char *datctype pg_attribute_unused();
+		locale_t	loc;
+
 		Assert(dbform->datlocprovider == COLLPROVIDER_LIBC);
+
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate);
+		datcollate = TextDatumGetCString(datum);
+		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
+		datctype = TextDatumGetCString(datum);
+
+		if (strcmp(datcollate, datctype) == 0)
+		{
+			/* Normal case where they're the same */
+			errno = 0;
+#ifndef WIN32
+			loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, datcollate,
+							NULL);
+#else
+			loc = _create_locale(LC_ALL, datcollate);
+#endif
+			if (!loc)
+				report_newlocale_failure(datcollate);
+		}
+		else
+		{
+#ifndef WIN32
+			/* We need two newlocale() steps */
+			locale_t	loc1;
+
+			errno = 0;
+			loc1 = newlocale(LC_COLLATE_MASK, datcollate, NULL);
+			if (!loc1)
+				report_newlocale_failure(datcollate);
+			errno = 0;
+			loc = newlocale(LC_CTYPE_MASK, datctype, loc1);
+			if (!loc)
+				report_newlocale_failure(datctype);
+#else
+
+			/*
+			 * XXX The _create_locale() API doesn't appear to support
+			 * this. Could perhaps be worked around by changing
+			 * pg_locale_t to contain two separate fields.
+			 */
+			ereport(ERROR,
+					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					 errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+		}
+
+		default_locale.info.lt = loc;
 	}
 
 	default_locale.provider = dbform->datlocprovider;
@@ -1616,12 +1667,7 @@ pg_newlocale_from_collation(Oid collid)
 	Assert(OidIsValid(collid));
 
 	if (collid == DEFAULT_COLLATION_OID)
-	{
-		if (default_locale.provider == COLLPROVIDER_LIBC)
-			return (pg_locale_t) 0;
-		else
-			return &default_locale;
-	}
+		return &default_locale;
 
 	cache_entry = lookup_collation_cache(collid, false);
 
-- 
2.34.1

From 53ec3ac3d061a5397657c943569593e98150b3a4 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 14:48:07 -0700
Subject: [PATCH v2 3/7] ts_locale.c: do not use NULL to mean the database
 collation.

Use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) to explicitly
get the database collation.
---
 src/backend/tsearch/ts_locale.c   | 13 +++++++------
 src/backend/tsearch/wparser_def.c |  3 ++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index bc44599de6..86c3d6e790 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -13,6 +13,7 @@
  */
 #include "postgres.h"
 
+#include "catalog/pg_collation.h"
 #include "common/string.h"
 #include "storage/fd.h"
 #include "tsearch/ts_locale.h"
@@ -36,7 +37,7 @@ t_isdigit(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 	if (clen == 1 || database_ctype_is_c)
 		return isdigit(TOUCHAR(ptr));
@@ -51,7 +52,7 @@ t_isspace(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 	if (clen == 1 || database_ctype_is_c)
 		return isspace(TOUCHAR(ptr));
@@ -66,7 +67,7 @@ t_isalpha(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 	if (clen == 1 || database_ctype_is_c)
 		return isalpha(TOUCHAR(ptr));
@@ -81,7 +82,7 @@ t_isalnum(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 	if (clen == 1 || database_ctype_is_c)
 		return isalnum(TOUCHAR(ptr));
@@ -96,7 +97,7 @@ t_isprint(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);;
 
 	if (clen == 1 || database_ctype_is_c)
 		return isprint(TOUCHAR(ptr));
@@ -266,7 +267,7 @@ char *
 lowerstr_with_len(const char *str, int len)
 {
 	char	   *out;
-	pg_locale_t mylocale = 0;	/* TODO */
+	pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 	if (len == 0)
 		return pstrdup("");
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 3919ef27b5..a333d46802 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -17,6 +17,7 @@
 #include <limits.h>
 #include <wctype.h>
 
+#include "catalog/pg_collation.h"
 #include "commands/defrem.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -299,7 +300,7 @@ TParserInit(char *str, int len)
 	 */
 	if (prs->charmaxlen > 1)
 	{
-		pg_locale_t mylocale = 0;	/* TODO */
+		pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);
 
 		prs->usewide = true;
 		if (database_ctype_is_c)
-- 
2.34.1

From 8d267620c1b0551e81e6ecc6dea6d58cd05df0c2 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 11:58:59 -0700
Subject: [PATCH v2 4/7] Remove support for null pg_locale_t.

Previously, passing NULL for pg_locale_t meant "use the libc provider
and the server environment". Now that the database collation is
represented as a proper pg_locale_t (not dependent on setlocale()),
remove special cases for NULL.
---
 src/backend/access/hash/hashfunc.c |  4 +--
 src/backend/utils/adt/like.c       |  2 +-
 src/backend/utils/adt/pg_locale.c  | 57 +++++++++++-------------------
 src/backend/utils/adt/varchar.c    |  4 +--
 src/backend/utils/adt/varlena.c    |  4 +--
 5 files changed, 27 insertions(+), 44 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index ce8ee0ea2e..ec2133d4e1 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -280,7 +280,7 @@ hashtext(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (!mylocale || pg_locale_deterministic(mylocale))
 	{
 		result = hash_any((unsigned char *) VARDATA_ANY(key),
 						  VARSIZE_ANY_EXHDR(key));
@@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (!mylocale || pg_locale_deterministic(mylocale))
 	{
 		result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
 								   VARSIZE_ANY_EXHDR(key),
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 57ead66b5a..0807b89b17 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -194,7 +194,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	else
 		locale = pg_newlocale_from_collation(collation);
 
-	if (!pg_locale_deterministic(locale))
+	if (!locale_is_c && !pg_locale_deterministic(locale))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for ILIKE")));
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 185b860dad..2ffc551913 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1531,11 +1531,7 @@ report_newlocale_failure(const char *localename)
 bool
 pg_locale_deterministic(pg_locale_t locale)
 {
-	/* default locale must always be deterministic */
-	if (locale == NULL)
-		return true;
-	else
-		return locale->deterministic;
+	return locale->deterministic;
 }
 
 void
@@ -1954,7 +1950,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
 	int			r;
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 #ifndef WIN32
 	Assert(false);
@@ -1994,10 +1990,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
 	((LPWSTR) a2p)[r] = 0;
 
 	errno = 0;
-	if (locale)
-		result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
-	else
-		result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
 	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
 		ereport(ERROR,
 				(errmsg("could not compare Unicode strings: %m")));
@@ -2023,7 +2016,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 {
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 #ifdef WIN32
 	if (GetDatabaseEncoding() == PG_UTF8)
 	{
@@ -2034,10 +2027,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 	}
 	else
 #endif							/* WIN32 */
-	if (locale)
 		result = strcoll_l(arg1, arg2, locale->info.lt);
-	else
-		result = strcoll(arg1, arg2);
 
 	return result;
 }
@@ -2059,7 +2049,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
 	char	   *arg2n;
 	int			result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 #ifdef WIN32
 	/* check for this case before doing the work for nul-termination */
@@ -2205,7 +2195,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
 {
 	int			result;
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strcoll_libc(arg1, arg2, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2241,7 +2231,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
 {
 	int			result;
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2259,13 +2249,10 @@ static size_t
 pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
 				pg_locale_t locale)
 {
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 #ifdef TRUST_STRXFRM
-	if (locale)
-		return strxfrm_l(dest, src, destsize, locale->info.lt);
-	else
-		return strxfrm(dest, src, destsize);
+	return strxfrm_l(dest, src, destsize, locale->info.lt);
 #else
 	/* shouldn't happen */
 	PGLOCALE_SUPPORT_ERROR(locale->provider);
@@ -2282,7 +2269,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
 	size_t		bufsize = srclen + 1;
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (bufsize > TEXTBUFLEN)
 		buf = palloc(bufsize);
@@ -2454,7 +2441,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
 bool
 pg_strxfrm_enabled(pg_locale_t locale)
 {
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 #ifdef TRUST_STRXFRM
 		return true;
 #else
@@ -2488,7 +2475,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strxfrm_libc(dest, src, destsize, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2525,7 +2512,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
 #ifdef USE_ICU
 	else if (locale->provider == COLLPROVIDER_ICU)
@@ -2545,7 +2532,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
 bool
 pg_strxfrm_prefix_enabled(pg_locale_t locale)
 {
-	if (!locale || locale->provider == COLLPROVIDER_LIBC)
+	if (locale->provider == COLLPROVIDER_LIBC)
 		return false;
 	else if (locale->provider == COLLPROVIDER_ICU)
 		return true;
@@ -2575,13 +2562,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale)
-		PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
 #ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
+	if (locale->provider == COLLPROVIDER_ICU)
 		result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
 	else
+#endif
 		PGLOCALE_SUPPORT_ERROR(locale->provider);
 
 	return result;
@@ -2610,13 +2595,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 {
 	size_t		result = 0;		/* keep compiler quiet */
 
-	if (!locale)
-		PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
 #ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
+	if (locale->provider == COLLPROVIDER_ICU)
 		result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
 	else
+#endif
 		PGLOCALE_SUPPORT_ERROR(locale->provider);
 
 	return result;
@@ -3166,7 +3149,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 {
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (tolen == 0)
 		return 0;
@@ -3223,7 +3206,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 {
 	size_t		result;
 
-	Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 	if (tolen == 0)
 		return 0;
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 02dfe219f5..60b1372227 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1014,7 +1014,7 @@ hashbpchar(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (!mylocale || pg_locale_deterministic(mylocale))
 	{
 		result = hash_any((unsigned char *) keydata, keylen);
 	}
@@ -1069,7 +1069,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (pg_locale_deterministic(mylocale))
+	if (!mylocale || pg_locale_deterministic(mylocale))
 	{
 		result = hash_any_extended((unsigned char *) keydata, keylen,
 								   PG_GETARG_INT64(1));
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba..9abae63221 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1224,7 +1224,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (!pg_locale_deterministic(mylocale))
+	if (mylocale && !pg_locale_deterministic(mylocale))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for substring searches")));
@@ -1803,7 +1803,7 @@ text_starts_with(PG_FUNCTION_ARGS)
 	if (!lc_collate_is_c(collid))
 		mylocale = pg_newlocale_from_collation(collid);
 
-	if (!pg_locale_deterministic(mylocale))
+	if (mylocale && !pg_locale_deterministic(mylocale))
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("nondeterministic collations are not supported for substring searches")));
-- 
2.34.1

From 3b208a189ecc0d6942c357f8b616ad0aa9ee5848 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Wed, 5 Jun 2024 15:22:04 -0700
Subject: [PATCH v2 5/7] Avoid setlocale() in lc_collate_is_c() and
 lc_ctype_is_c().

Store the collate and ctype strings in the pg_locale_t structure,
which requires some minor refactoring.
---
 src/backend/regex/regc_pg_locale.c   | 44 ++++++++++++++--------------
 src/backend/utils/adt/formatting.c   | 20 ++++++-------
 src/backend/utils/adt/like.c         |  2 +-
 src/backend/utils/adt/like_support.c |  2 +-
 src/backend/utils/adt/pg_locale.c    | 30 +++++++++++--------
 src/include/utils/pg_locale.h        |  7 ++++-
 6 files changed, 57 insertions(+), 48 deletions(-)

diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 85f3238eb0..1e180b844f 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -309,11 +309,11 @@ pg_wc_isdigit(pg_wchar c)
 					isdigit((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswdigit_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
+					isdigit_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -343,11 +343,11 @@ pg_wc_isalpha(pg_wchar c)
 					isalpha((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswalpha_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
+					isalpha_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -377,11 +377,11 @@ pg_wc_isalnum(pg_wchar c)
 					isalnum((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswalnum_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
+					isalnum_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -420,11 +420,11 @@ pg_wc_isupper(pg_wchar c)
 					isupper((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswupper_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper_l((unsigned char) c, pg_regex_locale->info.lt));
+					isupper_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -454,11 +454,11 @@ pg_wc_islower(pg_wchar c)
 					islower((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswlower_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower_l((unsigned char) c, pg_regex_locale->info.lt));
+					islower_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -488,11 +488,11 @@ pg_wc_isgraph(pg_wchar c)
 					isgraph((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswgraph_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
+					isgraph_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -522,11 +522,11 @@ pg_wc_isprint(pg_wchar c)
 					isprint((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswprint_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint_l((unsigned char) c, pg_regex_locale->info.lt));
+					isprint_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -556,11 +556,11 @@ pg_wc_ispunct(pg_wchar c)
 					ispunct((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswpunct_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
+					ispunct_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -590,11 +590,11 @@ pg_wc_isspace(pg_wchar c)
 					isspace((unsigned char) c));
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
+				return iswspace_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace_l((unsigned char) c, pg_regex_locale->info.lt));
+					isspace_l((unsigned char) c, pg_regex_locale->info.libc.lt));
 			break;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -632,11 +632,11 @@ pg_wc_toupper(pg_wchar c)
 			return c;
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
+				return towupper_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
+				return toupper_l((unsigned char) c, pg_regex_locale->info.libc.lt);
 			return c;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
@@ -674,11 +674,11 @@ pg_wc_tolower(pg_wchar c)
 			return c;
 		case PG_REGEX_LOCALE_WIDE_L:
 			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
+				return towlower_l((wint_t) c, pg_regex_locale->info.libc.lt);
 			/* FALL THRU */
 		case PG_REGEX_LOCALE_1BYTE_L:
 			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
+				return tolower_l((unsigned char) c, pg_regex_locale->info.libc.lt);
 			return c;
 		case PG_REGEX_LOCALE_ICU:
 #ifdef USE_ICU
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 8736ada4be..2c3a28ca25 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1732,7 +1732,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 				{
 					if (mylocale)
-						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+						workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt);
 					else
 						workspace[curr_char] = towlower(workspace[curr_char]);
 				}
@@ -1763,7 +1763,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 				for (p = result; *p; p++)
 				{
 					if (mylocale)
-						*p = tolower_l((unsigned char) *p, mylocale->info.lt);
+						*p = tolower_l((unsigned char) *p, mylocale->info.libc.lt);
 					else
 						*p = pg_tolower((unsigned char) *p);
 				}
@@ -1880,7 +1880,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 				{
 					if (mylocale)
-						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+						workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt);
 					else
 						workspace[curr_char] = towupper(workspace[curr_char]);
 				}
@@ -1911,7 +1911,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 				for (p = result; *p; p++)
 				{
 					if (mylocale)
-						*p = toupper_l((unsigned char) *p, mylocale->info.lt);
+						*p = toupper_l((unsigned char) *p, mylocale->info.libc.lt);
 					else
 						*p = pg_toupper((unsigned char) *p);
 				}
@@ -2084,10 +2084,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 					if (mylocale)
 					{
 						if (wasalnum)
-							workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+							workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt);
 						else
-							workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
-						wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+							workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt);
+						wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.libc.lt);
 					}
 					else
 					{
@@ -2127,10 +2127,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 					if (mylocale)
 					{
 						if (wasalnum)
-							*p = tolower_l((unsigned char) *p, mylocale->info.lt);
+							*p = tolower_l((unsigned char) *p, mylocale->info.libc.lt);
 						else
-							*p = toupper_l((unsigned char) *p, mylocale->info.lt);
-						wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
+							*p = toupper_l((unsigned char) *p, mylocale->info.libc.lt);
+						wasalnum = isalnum_l((unsigned char) *p, mylocale->info.libc.lt);
 					}
 					else
 					{
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 0807b89b17..4d9a207816 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
 	if (locale_is_c)
 		return pg_ascii_tolower(c);
 	else if (locale)
-		return tolower_l(c, locale->info.lt);
+		return tolower_l(c, locale->info.libc.lt);
 	else
 		return pg_tolower(c);
 }
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 2635050861..6a1c64176d 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1509,7 +1509,7 @@ pattern_char_isalpha(char c, bool is_multibyte,
 		return IS_HIGHBIT_SET(c) ||
 			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 	else if (locale && locale->provider == COLLPROVIDER_LIBC)
-		return isalpha_l((unsigned char) c, locale->info.lt);
+		return isalpha_l((unsigned char) c, locale->info.libc.lt);
 	else
 		return isalpha((unsigned char) c);
 }
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 2ffc551913..060dcbcb14 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1350,9 +1350,7 @@ lc_collate_is_c(Oid collation)
 		}
 		else if (default_locale.provider == COLLPROVIDER_LIBC)
 		{
-			localeptr = setlocale(LC_CTYPE, NULL);
-			if (!localeptr)
-				elog(ERROR, "invalid LC_CTYPE setting");
+			localeptr = default_locale.info.libc.collate;
 		}
 		else
 			elog(ERROR, "unexpected collation provider '%c'",
@@ -1416,9 +1414,7 @@ lc_ctype_is_c(Oid collation)
 		}
 		else if (default_locale.provider == COLLPROVIDER_LIBC)
 		{
-			localeptr = setlocale(LC_CTYPE, NULL);
-			if (!localeptr)
-				elog(ERROR, "invalid LC_CTYPE setting");
+			localeptr = default_locale.info.libc.ctype;
 		}
 		else
 			elog(ERROR, "unexpected collation provider '%c'",
@@ -1629,7 +1625,11 @@ pg_init_database_collation()
 #endif
 		}
 
-		default_locale.info.lt = loc;
+		default_locale.info.libc.collate = MemoryContextStrdup(
+			TopMemoryContext, datcollate);
+		default_locale.info.libc.ctype = MemoryContextStrdup(
+			TopMemoryContext, datctype);
+		default_locale.info.libc.lt = loc;
 	}
 
 	default_locale.provider = dbform->datlocprovider;
@@ -1750,7 +1750,11 @@ pg_newlocale_from_collation(Oid collid)
 #endif
 			}
 
-			result.info.lt = loc;
+			result.info.libc.collate = MemoryContextStrdup(
+				TopMemoryContext, collcollate);
+			result.info.libc.ctype = MemoryContextStrdup(
+				TopMemoryContext, collctype);
+			result.info.libc.lt = loc;
 		}
 		else if (collform->collprovider == COLLPROVIDER_ICU)
 		{
@@ -1990,7 +1994,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
 	((LPWSTR) a2p)[r] = 0;
 
 	errno = 0;
-	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
+	result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.libc.lt);
 	if (result == 2147483647)	/* _NLSCMPERROR; missing from mingw headers */
 		ereport(ERROR,
 				(errmsg("could not compare Unicode strings: %m")));
@@ -2027,7 +2031,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
 	}
 	else
 #endif							/* WIN32 */
-		result = strcoll_l(arg1, arg2, locale->info.lt);
+		result = strcoll_l(arg1, arg2, locale->info.libc.lt);
 
 	return result;
 }
@@ -2252,7 +2256,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
 	Assert(locale->provider == COLLPROVIDER_LIBC);
 
 #ifdef TRUST_STRXFRM
-	return strxfrm_l(dest, src, destsize, locale->info.lt);
+	return strxfrm_l(dest, src, destsize, locale->info.libc.lt);
 #else
 	/* shouldn't happen */
 	PGLOCALE_SUPPORT_ERROR(locale->provider);
@@ -3185,7 +3189,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 	else
 	{
 		/* Use wcstombs_l for nondefault locales */
-		result = wcstombs_l(to, from, tolen, locale->info.lt);
+		result = wcstombs_l(to, from, tolen, locale->info.libc.lt);
 	}
 
 	return result;
@@ -3247,7 +3251,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 		else
 		{
 			/* Use mbstowcs_l for nondefault locales */
-			result = mbstowcs_l(to, str, tolen, locale->info.lt);
+			result = mbstowcs_l(to, str, tolen, locale->info.libc.lt);
 		}
 
 		pfree(str);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 3a9026e7b7..63b724dc1d 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -80,7 +80,12 @@ struct pg_locale_struct
 		{
 			const char *locale;
 		}			builtin;
-		locale_t	lt;
+		struct
+		{
+			const char *collate;
+			const char *ctype;
+			locale_t	lt;
+		}			libc;
 #ifdef USE_ICU
 		struct
 		{
-- 
2.34.1

From f8532d5d287ff190917f4ac8bdc3de85ae697e8e Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 14 Jun 2024 15:13:59 -0700
Subject: [PATCH v2 6/7] Simplify collation cache.

Now that the result of pg_newlocale_from_collation() is always
non-NULL, move the collate_is_c and ctype_is_c flags into pg_locale_t,
and always use that. This commit eliminates the multi-stage
initialization of the cache and the extra code in lc_collate_is_c()
and lc_ctype_is_c().
---
 src/backend/utils/adt/pg_locale.c | 160 +++++-------------------------
 src/include/utils/pg_locale.h     |   2 +
 2 files changed, 28 insertions(+), 134 deletions(-)

diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 060dcbcb14..435a37a0e3 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
 typedef struct
 {
 	Oid			collid;			/* hash key: pg_collation OID */
-	bool		collate_is_c;	/* is collation's LC_COLLATE C? */
-	bool		ctype_is_c;		/* is collation's LC_CTYPE C? */
-	bool		flags_valid;	/* true if above flags are valid */
 	pg_locale_t locale;			/* locale_t struct, or 0 if not valid */
 } collation_cache_entry;
 
@@ -1230,7 +1227,7 @@ IsoLocaleName(const char *winlocname)
  */
 
 static collation_cache_entry *
-lookup_collation_cache(Oid collation, bool set_flags)
+lookup_collation_cache(Oid collation)
 {
 	collation_cache_entry *cache_entry;
 	bool		found;
@@ -1256,59 +1253,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
 		 * Make sure cache entry is marked invalid, in case we fail before
 		 * setting things.
 		 */
-		cache_entry->flags_valid = false;
 		cache_entry->locale = 0;
 	}
 
-	if (set_flags && !cache_entry->flags_valid)
-	{
-		/* Attempt to set the flags */
-		HeapTuple	tp;
-		Form_pg_collation collform;
-
-		tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
-		if (!HeapTupleIsValid(tp))
-			elog(ERROR, "cache lookup failed for collation %u", collation);
-		collform = (Form_pg_collation) GETSTRUCT(tp);
-
-		if (collform->collprovider == COLLPROVIDER_BUILTIN)
-		{
-			Datum		datum;
-			const char *colllocale;
-
-			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
-			colllocale = TextDatumGetCString(datum);
-
-			cache_entry->collate_is_c = true;
-			cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
-		}
-		else if (collform->collprovider == COLLPROVIDER_LIBC)
-		{
-			Datum		datum;
-			const char *collcollate;
-			const char *collctype;
-
-			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
-			collcollate = TextDatumGetCString(datum);
-			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
-			collctype = TextDatumGetCString(datum);
-
-			cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
-										 (strcmp(collcollate, "POSIX") == 0));
-			cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
-									   (strcmp(collctype, "POSIX") == 0));
-		}
-		else
-		{
-			cache_entry->collate_is_c = false;
-			cache_entry->ctype_is_c = false;
-		}
-
-		cache_entry->flags_valid = true;
-
-		ReleaseSysCache(tp);
-	}
-
 	return cache_entry;
 }
 
@@ -1326,45 +1273,6 @@ lc_collate_is_c(Oid collation)
 	if (!OidIsValid(collation))
 		return false;
 
-	/*
-	 * If we're asked about the default collation, we have to inquire of the C
-	 * library.  Cache the result so we only have to compute it once.
-	 */
-	if (collation == DEFAULT_COLLATION_OID)
-	{
-		static int	result = -1;
-		const char *localeptr;
-
-		if (result >= 0)
-			return (bool) result;
-
-		if (default_locale.provider == COLLPROVIDER_BUILTIN)
-		{
-			result = true;
-			return (bool) result;
-		}
-		else if (default_locale.provider == COLLPROVIDER_ICU)
-		{
-			result = false;
-			return (bool) result;
-		}
-		else if (default_locale.provider == COLLPROVIDER_LIBC)
-		{
-			localeptr = default_locale.info.libc.collate;
-		}
-		else
-			elog(ERROR, "unexpected collation provider '%c'",
-				 default_locale.provider);
-
-		if (strcmp(localeptr, "C") == 0)
-			result = true;
-		else if (strcmp(localeptr, "POSIX") == 0)
-			result = true;
-		else
-			result = false;
-		return (bool) result;
-	}
-
 	/*
 	 * If we're asked about the built-in C/POSIX collations, we know that.
 	 */
@@ -1375,7 +1283,7 @@ lc_collate_is_c(Oid collation)
 	/*
 	 * Otherwise, we have to consult pg_collation, but we cache that.
 	 */
-	return (lookup_collation_cache(collation, true))->collate_is_c;
+	return pg_newlocale_from_collation(collation)->collate_is_c;
 }
 
 /*
@@ -1391,44 +1299,6 @@ lc_ctype_is_c(Oid collation)
 	if (!OidIsValid(collation))
 		return false;
 
-	/*
-	 * If we're asked about the default collation, we have to inquire of the C
-	 * library.  Cache the result so we only have to compute it once.
-	 */
-	if (collation == DEFAULT_COLLATION_OID)
-	{
-		static int	result = -1;
-		const char *localeptr;
-
-		if (result >= 0)
-			return (bool) result;
-
-		if (default_locale.provider == COLLPROVIDER_BUILTIN)
-		{
-			localeptr = default_locale.info.builtin.locale;
-		}
-		else if (default_locale.provider == COLLPROVIDER_ICU)
-		{
-			result = false;
-			return (bool) result;
-		}
-		else if (default_locale.provider == COLLPROVIDER_LIBC)
-		{
-			localeptr = default_locale.info.libc.ctype;
-		}
-		else
-			elog(ERROR, "unexpected collation provider '%c'",
-				 default_locale.provider);
-
-		if (strcmp(localeptr, "C") == 0)
-			result = true;
-		else if (strcmp(localeptr, "POSIX") == 0)
-			result = true;
-		else
-			result = false;
-		return (bool) result;
-	}
-
 	/*
 	 * If we're asked about the built-in C/POSIX collations, we know that.
 	 */
@@ -1439,7 +1309,7 @@ lc_ctype_is_c(Oid collation)
 	/*
 	 * Otherwise, we have to consult pg_collation, but we cache that.
 	 */
-	return (lookup_collation_cache(collation, true))->ctype_is_c;
+	return pg_newlocale_from_collation(collation)->ctype_is_c;
 }
 
 void
@@ -1553,6 +1423,9 @@ pg_init_database_collation()
 
 		builtin_validate_locale(dbform->encoding, datlocale);
 
+		default_locale.collate_is_c = true;
+		default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
+
 		default_locale.info.builtin.locale = MemoryContextStrdup(
 																 TopMemoryContext, datlocale);
 	}
@@ -1564,6 +1437,9 @@ pg_init_database_collation()
 		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
 		datlocale = TextDatumGetCString(datum);
 
+		default_locale.collate_is_c = false;
+		default_locale.ctype_is_c = false;
+
 		datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
 		if (!isnull)
 			icurules = TextDatumGetCString(datum);
@@ -1585,6 +1461,11 @@ pg_init_database_collation()
 		datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
 		datctype = TextDatumGetCString(datum);
 
+		default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
+			(strcmp(datcollate, "POSIX") == 0);
+		default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
+			(strcmp(datctype, "POSIX") == 0);
+
 		if (strcmp(datcollate, datctype) == 0)
 		{
 			/* Normal case where they're the same */
@@ -1665,7 +1546,7 @@ pg_newlocale_from_collation(Oid collid)
 	if (collid == DEFAULT_COLLATION_OID)
 		return &default_locale;
 
-	cache_entry = lookup_collation_cache(collid, false);
+	cache_entry = lookup_collation_cache(collid);
 
 	if (cache_entry->locale == 0)
 	{
@@ -1694,6 +1575,9 @@ pg_newlocale_from_collation(Oid collid)
 			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
 			locstr = TextDatumGetCString(datum);
 
+			result.collate_is_c = true;
+			result.collate_is_c = (strcmp(locstr, "C") == 0);
+
 			builtin_validate_locale(GetDatabaseEncoding(), locstr);
 
 			result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
@@ -1710,6 +1594,11 @@ pg_newlocale_from_collation(Oid collid)
 			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
 			collctype = TextDatumGetCString(datum);
 
+			result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
+				(strcmp(collcollate, "POSIX") == 0);
+			result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
+				(strcmp(collctype, "POSIX") == 0);
+
 			if (strcmp(collcollate, collctype) == 0)
 			{
 				/* Normal case where they're the same */
@@ -1764,6 +1653,9 @@ pg_newlocale_from_collation(Oid collid)
 			datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
 			iculocstr = TextDatumGetCString(datum);
 
+			result.collate_is_c = false;
+			result.ctype_is_c = false;
+
 			datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
 			if (!isnull)
 				icurules = TextDatumGetCString(datum);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 63b724dc1d..a177b09e09 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -74,6 +74,8 @@ struct pg_locale_struct
 {
 	char		provider;
 	bool		deterministic;
+	bool		collate_is_c;
+	bool		ctype_is_c;
 	union
 	{
 		struct
-- 
2.34.1

Reply via email to