There was an unconference session at pgconf.dev related to threading support. One of the problems identified was setlocale().
The attached series of patches make collation not depend on setlocale(), even if the database collation uses the libc provider. Since commit 8d9a9f034e, all supported platforms have locale_t, so we can use strcoll_l(), etc., or uselocale() when no "_l" variant is available. A brief test shows that there may be a performance regression for libc default collations. But if so, I'm not sure that's avoidable if the goal is to take away setlocale. I'll see if removing the extra branches mitigates it. -- Jeff Davis PostgreSQL Contributor Team - AWS
From 9f50c24878740acdd3f1bc036442a0fcc0ea1a5e Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:45:55 -0700 Subject: [PATCH v1 1/5] Make database default collation internal to pg_locale.c. --- src/backend/utils/adt/pg_locale.c | 64 ++++++++++++++++++++++++++++++- src/backend/utils/init/postinit.c | 35 ++--------------- src/include/utils/pg_locale.h | 3 +- 3 files changed, 66 insertions(+), 36 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 7e5bb2b703..29f16c49cb 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -56,6 +56,7 @@ #include "access/htup_details.h" #include "catalog/pg_collation.h" +#include "catalog/pg_database.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" @@ -116,6 +117,8 @@ char *localized_full_months[12 + 1]; /* is the databases's LC_CTYPE the C locale? */ bool database_ctype_is_c = false; +static struct pg_locale_struct default_locale; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; @@ -1443,8 +1446,6 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } -struct pg_locale_struct default_locale; - void make_icu_collator(const char *iculocstr, const char *icurules, @@ -1537,6 +1538,65 @@ pg_locale_deterministic(pg_locale_t locale) return locale->deterministic; } +void +pg_init_database_collation() +{ + HeapTuple tup; + Form_pg_database dbform; + Datum datum; + bool isnull; + + /* Fetch our pg_database row normally, via syscache */ + tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); + dbform = (Form_pg_database) GETSTRUCT(tup); + + if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) + { + char *datlocale; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + builtin_validate_locale(dbform->encoding, datlocale); + + default_locale.info.builtin.locale = MemoryContextStrdup( + TopMemoryContext, datlocale); + } + else if (dbform->datlocprovider == COLLPROVIDER_ICU) + { + char *datlocale; + char *icurules; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(datlocale, icurules, &default_locale); + } + else + { + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + } + + default_locale.provider = dbform->datlocprovider; + + /* + * Default locale is currently always deterministic. Nondeterministic + * locales currently don't support pattern matching, which would break a + * lot of things if applied globally. + */ + default_locale.deterministic = true; + + ReleaseSysCache(tup); +} + /* * Create a locale_t from a collation OID. Results are cached for the * lifetime of the backend. Thus, do not free the result with freelocale(). diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 0805398e24..6347efdd5a 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -423,43 +423,14 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect strcmp(ctype, "POSIX") == 0) database_ctype_is_c = true; - if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) - { - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); - datlocale = TextDatumGetCString(datum); - - builtin_validate_locale(dbform->encoding, datlocale); - - default_locale.info.builtin.locale = MemoryContextStrdup( - TopMemoryContext, datlocale); - } - else if (dbform->datlocprovider == COLLPROVIDER_ICU) - { - char *icurules; + pg_init_database_collation(); - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datlocale, &isnull); + if (!isnull) datlocale = TextDatumGetCString(datum); - - datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); - if (!isnull) - icurules = TextDatumGetCString(datum); - else - icurules = NULL; - - make_icu_collator(datlocale, icurules, &default_locale); - } else datlocale = NULL; - default_locale.provider = dbform->datlocprovider; - - /* - * Default locale is currently always deterministic. Nondeterministic - * locales currently don't support pattern matching, which would break a - * lot of things if applied globally. - */ - default_locale.deterministic = true; - /* * Check collation version. See similar code in * pg_newlocale_from_collation(). Note that here we warn instead of error diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 205aa20067..1c81a84124 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -93,13 +93,12 @@ struct pg_locale_struct typedef struct pg_locale_struct *pg_locale_t; -extern PGDLLIMPORT struct pg_locale_struct default_locale; - extern void make_icu_collator(const char *iculocstr, const char *icurules, struct pg_locale_struct *resultp); extern bool pg_locale_deterministic(pg_locale_t locale); +extern void pg_init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); -- 2.34.1
From 8673b5559121b861db98d7b8eacc320ff3c15595 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 15:02:26 -0700 Subject: [PATCH v1 2/5] Make database collation pg_locale_t always non-NULL. Previously, the database collation's pg_locale_t was NULL for the libc provider. This commit properly initializes a pg_locale_t object in all cases. --- src/backend/utils/adt/pg_locale.c | 58 +++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 29f16c49cb..185b860dad 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1582,7 +1582,58 @@ pg_init_database_collation() } else { + const char *datcollate; + const char *datctype pg_attribute_unused(); + locale_t loc; + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate); + datcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); + datctype = TextDatumGetCString(datum); + + if (strcmp(datcollate, datctype) == 0) + { + /* Normal case where they're the same */ + errno = 0; +#ifndef WIN32 + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, datcollate, + NULL); +#else + loc = _create_locale(LC_ALL, datcollate); +#endif + if (!loc) + report_newlocale_failure(datcollate); + } + else + { +#ifndef WIN32 + /* We need two newlocale() steps */ + locale_t loc1; + + errno = 0; + loc1 = newlocale(LC_COLLATE_MASK, datcollate, NULL); + if (!loc1) + report_newlocale_failure(datcollate); + errno = 0; + loc = newlocale(LC_CTYPE_MASK, datctype, loc1); + if (!loc) + report_newlocale_failure(datctype); +#else + + /* + * XXX The _create_locale() API doesn't appear to support + * this. Could perhaps be worked around by changing + * pg_locale_t to contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + default_locale.info.lt = loc; } default_locale.provider = dbform->datlocprovider; @@ -1616,12 +1667,7 @@ pg_newlocale_from_collation(Oid collid) Assert(OidIsValid(collid)); if (collid == DEFAULT_COLLATION_OID) - { - if (default_locale.provider == COLLPROVIDER_LIBC) - return (pg_locale_t) 0; - else - return &default_locale; - } + return &default_locale; cache_entry = lookup_collation_cache(collid, false); -- 2.34.1
From f13f0a08f0285ba6c94742eec4f57d7a1dfc1620 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 14:48:07 -0700 Subject: [PATCH v1 3/5] ts_locale.c: do not use NULL to mean the database collation. Use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) to explicitly get the database collation. --- src/backend/tsearch/ts_locale.c | 13 +++++++------ src/backend/tsearch/wparser_def.c | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index bc44599de6..86c3d6e790 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "catalog/pg_collation.h" #include "common/string.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" @@ -36,7 +37,7 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isdigit(TOUCHAR(ptr)); @@ -51,7 +52,7 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isspace(TOUCHAR(ptr)); @@ -66,7 +67,7 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalpha(TOUCHAR(ptr)); @@ -81,7 +82,7 @@ t_isalnum(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalnum(TOUCHAR(ptr)); @@ -96,7 +97,7 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);; if (clen == 1 || database_ctype_is_c) return isprint(TOUCHAR(ptr)); @@ -266,7 +267,7 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (len == 0) return pstrdup(""); diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 3919ef27b5..a333d46802 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -17,6 +17,7 @@ #include <limits.h> #include <wctype.h> +#include "catalog/pg_collation.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -299,7 +300,7 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); prs->usewide = true; if (database_ctype_is_c) -- 2.34.1
From c29ac64ddd1f33f61c6a284329d29afa0995499a Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:58:59 -0700 Subject: [PATCH v1 4/5] Remove support for null pg_locale_t. Previously, passing NULL for pg_locale_t meant "use the libc provider and the server environment". Now that the database collation is represented as a proper pg_locale_t (not dependent on setlocale()), remove special cases for NULL. --- src/backend/access/hash/hashfunc.c | 4 +-- src/backend/utils/adt/like.c | 2 +- src/backend/utils/adt/pg_locale.c | 57 +++++++++++------------------- src/backend/utils/adt/varchar.c | 4 +-- src/backend/utils/adt/varlena.c | 4 +-- 5 files changed, 27 insertions(+), 44 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index ce8ee0ea2e..ec2133d4e1 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -280,7 +280,7 @@ hashtext(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); @@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 57ead66b5a..0807b89b17 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -194,7 +194,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else locale = pg_newlocale_from_collation(collation); - if (!pg_locale_deterministic(locale)) + if (!locale_is_c && !pg_locale_deterministic(locale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for ILIKE"))); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 185b860dad..2ffc551913 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1531,11 +1531,7 @@ report_newlocale_failure(const char *localename) bool pg_locale_deterministic(pg_locale_t locale) { - /* default locale must always be deterministic */ - if (locale == NULL) - return true; - else - return locale->deterministic; + return locale->deterministic; } void @@ -1954,7 +1950,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, int r; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); #ifndef WIN32 Assert(false); @@ -1994,10 +1990,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); - else - result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -2023,7 +2016,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 if (GetDatabaseEncoding() == PG_UTF8) { @@ -2034,10 +2027,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale) result = strcoll_l(arg1, arg2, locale->info.lt); - else - result = strcoll(arg1, arg2); return result; } @@ -2059,7 +2049,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, char *arg2n; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 /* check for this case before doing the work for nul-termination */ @@ -2205,7 +2195,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strcoll_libc(arg1, arg2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2241,7 +2231,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2259,13 +2249,10 @@ static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - if (locale) - return strxfrm_l(dest, src, destsize, locale->info.lt); - else - return strxfrm(dest, src, destsize); + return strxfrm_l(dest, src, destsize, locale->info.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -2282,7 +2269,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, size_t bufsize = srclen + 1; size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -2454,7 +2441,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, bool pg_strxfrm_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) #ifdef TRUST_STRXFRM return true; #else @@ -2488,7 +2475,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strxfrm_libc(dest, src, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2525,7 +2512,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2545,7 +2532,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, bool pg_strxfrm_prefix_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) return false; else if (locale->provider == COLLPROVIDER_ICU) return true; @@ -2575,13 +2562,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -2610,13 +2595,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -3166,7 +3149,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3223,7 +3206,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 02dfe219f5..60b1372227 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -1014,7 +1014,7 @@ hashbpchar(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) keydata, keylen); } @@ -1069,7 +1069,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) keydata, keylen, PG_GETARG_INT64(1)); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d2e2e9bbba..9abae63221 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1224,7 +1224,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!pg_locale_deterministic(mylocale)) + if (mylocale && !pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); @@ -1803,7 +1803,7 @@ text_starts_with(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!pg_locale_deterministic(mylocale)) + if (mylocale && !pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); -- 2.34.1
From 865a8c586d97eea6d496eab16edf35a842294ed2 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 15:22:04 -0700 Subject: [PATCH v1 5/5] Avoid setlocale() in lc_collate_is_c() and lc_ctype_is_c(). Store the collate and ctype strings in the pg_locale_t structure, which requires some minor refactoring. --- src/backend/regex/regc_pg_locale.c | 44 ++++++++++++++-------------- src/backend/utils/adt/formatting.c | 20 ++++++------- src/backend/utils/adt/like.c | 2 +- src/backend/utils/adt/like_support.c | 2 +- src/backend/utils/adt/pg_locale.c | 30 +++++++++++-------- src/include/utils/pg_locale.h | 7 ++++- 6 files changed, 57 insertions(+), 48 deletions(-) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 85f3238eb0..1e180b844f 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -309,11 +309,11 @@ pg_wc_isdigit(pg_wchar c) isdigit((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); + return iswdigit_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); + isdigit_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -343,11 +343,11 @@ pg_wc_isalpha(pg_wchar c) isalpha((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); + return iswalpha_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); + isalpha_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -377,11 +377,11 @@ pg_wc_isalnum(pg_wchar c) isalnum((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); + return iswalnum_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); + isalnum_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -420,11 +420,11 @@ pg_wc_isupper(pg_wchar c) isupper((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); + return iswupper_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); + isupper_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -454,11 +454,11 @@ pg_wc_islower(pg_wchar c) islower((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); + return iswlower_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); + islower_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -488,11 +488,11 @@ pg_wc_isgraph(pg_wchar c) isgraph((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); + return iswgraph_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); + isgraph_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -522,11 +522,11 @@ pg_wc_isprint(pg_wchar c) isprint((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); + return iswprint_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); + isprint_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -556,11 +556,11 @@ pg_wc_ispunct(pg_wchar c) ispunct((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); + return iswpunct_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); + ispunct_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -590,11 +590,11 @@ pg_wc_isspace(pg_wchar c) isspace((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); + return iswspace_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); + isspace_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -632,11 +632,11 @@ pg_wc_toupper(pg_wchar c) return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); + return towupper_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); + return toupper_l((unsigned char) c, pg_regex_locale->info.libc.lt); return c; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -674,11 +674,11 @@ pg_wc_tolower(pg_wchar c) return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); + return towlower_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); + return tolower_l((unsigned char) c, pg_regex_locale->info.libc.lt); return c; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 8736ada4be..2c3a28ca25 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1732,7 +1732,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else workspace[curr_char] = towlower(workspace[curr_char]); } @@ -1763,7 +1763,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) for (p = result; *p; p++) { if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else *p = pg_tolower((unsigned char) *p); } @@ -1880,7 +1880,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); else workspace[curr_char] = towupper(workspace[curr_char]); } @@ -1911,7 +1911,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) for (p = result; *p; p++) { if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); else *p = pg_toupper((unsigned char) *p); } @@ -2084,10 +2084,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.libc.lt); } else { @@ -2127,10 +2127,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.libc.lt); } else { diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 0807b89b17..4d9a207816 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) if (locale_is_c) return pg_ascii_tolower(c); else if (locale) - return tolower_l(c, locale->info.lt); + return tolower_l(c, locale->info.libc.lt); else return pg_tolower(c); } diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 2635050861..6a1c64176d 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1509,7 +1509,7 @@ pattern_char_isalpha(char c, bool is_multibyte, return IS_HIGHBIT_SET(c) || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else if (locale && locale->provider == COLLPROVIDER_LIBC) - return isalpha_l((unsigned char) c, locale->info.lt); + return isalpha_l((unsigned char) c, locale->info.libc.lt); else return isalpha((unsigned char) c); } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 2ffc551913..060dcbcb14 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1350,9 +1350,7 @@ lc_collate_is_c(Oid collation) } else if (default_locale.provider == COLLPROVIDER_LIBC) { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); + localeptr = default_locale.info.libc.collate; } else elog(ERROR, "unexpected collation provider '%c'", @@ -1416,9 +1414,7 @@ lc_ctype_is_c(Oid collation) } else if (default_locale.provider == COLLPROVIDER_LIBC) { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); + localeptr = default_locale.info.libc.ctype; } else elog(ERROR, "unexpected collation provider '%c'", @@ -1629,7 +1625,11 @@ pg_init_database_collation() #endif } - default_locale.info.lt = loc; + default_locale.info.libc.collate = MemoryContextStrdup( + TopMemoryContext, datcollate); + default_locale.info.libc.ctype = MemoryContextStrdup( + TopMemoryContext, datctype); + default_locale.info.libc.lt = loc; } default_locale.provider = dbform->datlocprovider; @@ -1750,7 +1750,11 @@ pg_newlocale_from_collation(Oid collid) #endif } - result.info.lt = loc; + result.info.libc.collate = MemoryContextStrdup( + TopMemoryContext, collcollate); + result.info.libc.ctype = MemoryContextStrdup( + TopMemoryContext, collctype); + result.info.libc.lt = loc; } else if (collform->collprovider == COLLPROVIDER_ICU) { @@ -1990,7 +1994,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.libc.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -2027,7 +2031,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - result = strcoll_l(arg1, arg2, locale->info.lt); + result = strcoll_l(arg1, arg2, locale->info.libc.lt); return result; } @@ -2252,7 +2256,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize, Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - return strxfrm_l(dest, src, destsize, locale->info.lt); + return strxfrm_l(dest, src, destsize, locale->info.libc.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -3185,7 +3189,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) else { /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale->info.lt); + result = wcstombs_l(to, from, tolen, locale->info.libc.lt); } return result; @@ -3247,7 +3251,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, else { /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); + result = mbstowcs_l(to, str, tolen, locale->info.libc.lt); } pfree(str); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 1c81a84124..1130f7b86f 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -80,7 +80,12 @@ struct pg_locale_struct { const char *locale; } builtin; - locale_t lt; + struct + { + const char *collate; + const char *ctype; + locale_t lt; + } libc; #ifdef USE_ICU struct { -- 2.34.1