On Thu, 2024-06-06 at 11:37 -0700, Jeff Davis wrote: > > I think this patch series is a nice cleanup, as well, making libc > more > like the other providers and not dependent on global state.
New rebased series attached with additional cleanup. Now that pg_locale_t is never NULL, we can simplify the way the collation cache works, eliminating ~100 lines. -- Jeff Davis PostgreSQL Contributor Team - AWS
From d3862b88d8df3372ebdd368489a86142bd11f42c Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:45:55 -0700 Subject: [PATCH v2 1/7] Make database default collation internal to pg_locale.c. --- src/backend/utils/adt/pg_locale.c | 64 ++++++++++++++++++++++++++++++- src/backend/utils/init/postinit.c | 35 ++--------------- src/include/utils/pg_locale.h | 3 +- 3 files changed, 66 insertions(+), 36 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 7e5bb2b703..29f16c49cb 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -56,6 +56,7 @@ #include "access/htup_details.h" #include "catalog/pg_collation.h" +#include "catalog/pg_database.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" @@ -116,6 +117,8 @@ char *localized_full_months[12 + 1]; /* is the databases's LC_CTYPE the C locale? */ bool database_ctype_is_c = false; +static struct pg_locale_struct default_locale; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; @@ -1443,8 +1446,6 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } -struct pg_locale_struct default_locale; - void make_icu_collator(const char *iculocstr, const char *icurules, @@ -1537,6 +1538,65 @@ pg_locale_deterministic(pg_locale_t locale) return locale->deterministic; } +void +pg_init_database_collation() +{ + HeapTuple tup; + Form_pg_database dbform; + Datum datum; + bool isnull; + + /* Fetch our pg_database row normally, via syscache */ + tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", MyDatabaseId); + dbform = (Form_pg_database) GETSTRUCT(tup); + + if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) + { + char *datlocale; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + builtin_validate_locale(dbform->encoding, datlocale); + + default_locale.info.builtin.locale = MemoryContextStrdup( + TopMemoryContext, datlocale); + } + else if (dbform->datlocprovider == COLLPROVIDER_ICU) + { + char *datlocale; + char *icurules; + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datlocale = TextDatumGetCString(datum); + + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); + if (!isnull) + icurules = TextDatumGetCString(datum); + else + icurules = NULL; + + make_icu_collator(datlocale, icurules, &default_locale); + } + else + { + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + } + + default_locale.provider = dbform->datlocprovider; + + /* + * Default locale is currently always deterministic. Nondeterministic + * locales currently don't support pattern matching, which would break a + * lot of things if applied globally. + */ + default_locale.deterministic = true; + + ReleaseSysCache(tup); +} + /* * Create a locale_t from a collation OID. Results are cached for the * lifetime of the backend. Thus, do not free the result with freelocale(). diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 0805398e24..6347efdd5a 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -423,43 +423,14 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect strcmp(ctype, "POSIX") == 0) database_ctype_is_c = true; - if (dbform->datlocprovider == COLLPROVIDER_BUILTIN) - { - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); - datlocale = TextDatumGetCString(datum); - - builtin_validate_locale(dbform->encoding, datlocale); - - default_locale.info.builtin.locale = MemoryContextStrdup( - TopMemoryContext, datlocale); - } - else if (dbform->datlocprovider == COLLPROVIDER_ICU) - { - char *icurules; + pg_init_database_collation(); - datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_datlocale, &isnull); + if (!isnull) datlocale = TextDatumGetCString(datum); - - datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); - if (!isnull) - icurules = TextDatumGetCString(datum); - else - icurules = NULL; - - make_icu_collator(datlocale, icurules, &default_locale); - } else datlocale = NULL; - default_locale.provider = dbform->datlocprovider; - - /* - * Default locale is currently always deterministic. Nondeterministic - * locales currently don't support pattern matching, which would break a - * lot of things if applied globally. - */ - default_locale.deterministic = true; - /* * Check collation version. See similar code in * pg_newlocale_from_collation(). Note that here we warn instead of error diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 040968d6ff..3a9026e7b7 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -93,13 +93,12 @@ struct pg_locale_struct typedef struct pg_locale_struct *pg_locale_t; -extern PGDLLIMPORT struct pg_locale_struct default_locale; - extern void make_icu_collator(const char *iculocstr, const char *icurules, struct pg_locale_struct *resultp); extern bool pg_locale_deterministic(pg_locale_t locale); +extern void pg_init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); -- 2.34.1
From 6bce19a5ce602f718eb29c6d8c24adc4b648c2ee Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 15:02:26 -0700 Subject: [PATCH v2 2/7] Make database collation pg_locale_t always non-NULL. Previously, the database collation's pg_locale_t was NULL for the libc provider. This commit properly initializes a pg_locale_t object in all cases. --- src/backend/utils/adt/pg_locale.c | 58 +++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 29f16c49cb..185b860dad 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1582,7 +1582,58 @@ pg_init_database_collation() } else { + const char *datcollate; + const char *datctype pg_attribute_unused(); + locale_t loc; + Assert(dbform->datlocprovider == COLLPROVIDER_LIBC); + + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datcollate); + datcollate = TextDatumGetCString(datum); + datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); + datctype = TextDatumGetCString(datum); + + if (strcmp(datcollate, datctype) == 0) + { + /* Normal case where they're the same */ + errno = 0; +#ifndef WIN32 + loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, datcollate, + NULL); +#else + loc = _create_locale(LC_ALL, datcollate); +#endif + if (!loc) + report_newlocale_failure(datcollate); + } + else + { +#ifndef WIN32 + /* We need two newlocale() steps */ + locale_t loc1; + + errno = 0; + loc1 = newlocale(LC_COLLATE_MASK, datcollate, NULL); + if (!loc1) + report_newlocale_failure(datcollate); + errno = 0; + loc = newlocale(LC_CTYPE_MASK, datctype, loc1); + if (!loc) + report_newlocale_failure(datctype); +#else + + /* + * XXX The _create_locale() API doesn't appear to support + * this. Could perhaps be worked around by changing + * pg_locale_t to contain two separate fields. + */ + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("collations with different collate and ctype values are not supported on this platform"))); +#endif + } + + default_locale.info.lt = loc; } default_locale.provider = dbform->datlocprovider; @@ -1616,12 +1667,7 @@ pg_newlocale_from_collation(Oid collid) Assert(OidIsValid(collid)); if (collid == DEFAULT_COLLATION_OID) - { - if (default_locale.provider == COLLPROVIDER_LIBC) - return (pg_locale_t) 0; - else - return &default_locale; - } + return &default_locale; cache_entry = lookup_collation_cache(collid, false); -- 2.34.1
From 53ec3ac3d061a5397657c943569593e98150b3a4 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 14:48:07 -0700 Subject: [PATCH v2 3/7] ts_locale.c: do not use NULL to mean the database collation. Use pg_newlocale_from_collation(DEFAULT_COLLATION_OID) to explicitly get the database collation. --- src/backend/tsearch/ts_locale.c | 13 +++++++------ src/backend/tsearch/wparser_def.c | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index bc44599de6..86c3d6e790 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -13,6 +13,7 @@ */ #include "postgres.h" +#include "catalog/pg_collation.h" #include "common/string.h" #include "storage/fd.h" #include "tsearch/ts_locale.h" @@ -36,7 +37,7 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isdigit(TOUCHAR(ptr)); @@ -51,7 +52,7 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isspace(TOUCHAR(ptr)); @@ -66,7 +67,7 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalpha(TOUCHAR(ptr)); @@ -81,7 +82,7 @@ t_isalnum(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (clen == 1 || database_ctype_is_c) return isalnum(TOUCHAR(ptr)); @@ -96,7 +97,7 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID);; if (clen == 1 || database_ctype_is_c) return isprint(TOUCHAR(ptr)); @@ -266,7 +267,7 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); if (len == 0) return pstrdup(""); diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 3919ef27b5..a333d46802 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -17,6 +17,7 @@ #include <limits.h> #include <wctype.h> +#include "catalog/pg_collation.h" #include "commands/defrem.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -299,7 +300,7 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - pg_locale_t mylocale = 0; /* TODO */ + pg_locale_t mylocale = pg_newlocale_from_collation(DEFAULT_COLLATION_OID); prs->usewide = true; if (database_ctype_is_c) -- 2.34.1
From 8d267620c1b0551e81e6ecc6dea6d58cd05df0c2 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 11:58:59 -0700 Subject: [PATCH v2 4/7] Remove support for null pg_locale_t. Previously, passing NULL for pg_locale_t meant "use the libc provider and the server environment". Now that the database collation is represented as a proper pg_locale_t (not dependent on setlocale()), remove special cases for NULL. --- src/backend/access/hash/hashfunc.c | 4 +-- src/backend/utils/adt/like.c | 2 +- src/backend/utils/adt/pg_locale.c | 57 +++++++++++------------------- src/backend/utils/adt/varchar.c | 4 +-- src/backend/utils/adt/varlena.c | 4 +-- 5 files changed, 27 insertions(+), 44 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index ce8ee0ea2e..ec2133d4e1 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -280,7 +280,7 @@ hashtext(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); @@ -334,7 +334,7 @@ hashtextextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key), diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 57ead66b5a..0807b89b17 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -194,7 +194,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else locale = pg_newlocale_from_collation(collation); - if (!pg_locale_deterministic(locale)) + if (!locale_is_c && !pg_locale_deterministic(locale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for ILIKE"))); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 185b860dad..2ffc551913 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1531,11 +1531,7 @@ report_newlocale_failure(const char *localename) bool pg_locale_deterministic(pg_locale_t locale) { - /* default locale must always be deterministic */ - if (locale == NULL) - return true; - else - return locale->deterministic; + return locale->deterministic; } void @@ -1954,7 +1950,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, int r; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); #ifndef WIN32 Assert(false); @@ -1994,10 +1990,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - if (locale) - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); - else - result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -2023,7 +2016,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 if (GetDatabaseEncoding() == PG_UTF8) { @@ -2034,10 +2027,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - if (locale) result = strcoll_l(arg1, arg2, locale->info.lt); - else - result = strcoll(arg1, arg2); return result; } @@ -2059,7 +2049,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2, char *arg2n; int result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef WIN32 /* check for this case before doing the work for nul-termination */ @@ -2205,7 +2195,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale) { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strcoll_libc(arg1, arg2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2241,7 +2231,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2, { int result; - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strncoll_libc(arg1, len1, arg2, len2, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2259,13 +2249,10 @@ static size_t pg_strxfrm_libc(char *dest, const char *src, size_t destsize, pg_locale_t locale) { - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - if (locale) - return strxfrm_l(dest, src, destsize, locale->info.lt); - else - return strxfrm(dest, src, destsize); + return strxfrm_l(dest, src, destsize, locale->info.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -2282,7 +2269,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize, size_t bufsize = srclen + 1; size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (bufsize > TEXTBUFLEN) buf = palloc(bufsize); @@ -2454,7 +2441,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen, bool pg_strxfrm_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) #ifdef TRUST_STRXFRM return true; #else @@ -2488,7 +2475,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale) { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strxfrm_libc(dest, src, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2525,7 +2512,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, { size_t result = 0; /* keep compiler quiet */ - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale); #ifdef USE_ICU else if (locale->provider == COLLPROVIDER_ICU) @@ -2545,7 +2532,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen, bool pg_strxfrm_prefix_enabled(pg_locale_t locale) { - if (!locale || locale->provider == COLLPROVIDER_LIBC) + if (locale->provider == COLLPROVIDER_LIBC) return false; else if (locale->provider == COLLPROVIDER_ICU) return true; @@ -2575,13 +2562,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -2610,13 +2595,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, { size_t result = 0; /* keep compiler quiet */ - if (!locale) - PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC); #ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) + if (locale->provider == COLLPROVIDER_ICU) result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale); -#endif else +#endif PGLOCALE_SUPPORT_ERROR(locale->provider); return result; @@ -3166,7 +3149,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; @@ -3223,7 +3206,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, { size_t result; - Assert(!locale || locale->provider == COLLPROVIDER_LIBC); + Assert(locale->provider == COLLPROVIDER_LIBC); if (tolen == 0) return 0; diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index 02dfe219f5..60b1372227 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -1014,7 +1014,7 @@ hashbpchar(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any((unsigned char *) keydata, keylen); } @@ -1069,7 +1069,7 @@ hashbpcharextended(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (pg_locale_deterministic(mylocale)) + if (!mylocale || pg_locale_deterministic(mylocale)) { result = hash_any_extended((unsigned char *) keydata, keylen, PG_GETARG_INT64(1)); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d2e2e9bbba..9abae63221 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1224,7 +1224,7 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!pg_locale_deterministic(mylocale)) + if (mylocale && !pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); @@ -1803,7 +1803,7 @@ text_starts_with(PG_FUNCTION_ARGS) if (!lc_collate_is_c(collid)) mylocale = pg_newlocale_from_collation(collid); - if (!pg_locale_deterministic(mylocale)) + if (mylocale && !pg_locale_deterministic(mylocale)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("nondeterministic collations are not supported for substring searches"))); -- 2.34.1
From 3b208a189ecc0d6942c357f8b616ad0aa9ee5848 Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Wed, 5 Jun 2024 15:22:04 -0700 Subject: [PATCH v2 5/7] Avoid setlocale() in lc_collate_is_c() and lc_ctype_is_c(). Store the collate and ctype strings in the pg_locale_t structure, which requires some minor refactoring. --- src/backend/regex/regc_pg_locale.c | 44 ++++++++++++++-------------- src/backend/utils/adt/formatting.c | 20 ++++++------- src/backend/utils/adt/like.c | 2 +- src/backend/utils/adt/like_support.c | 2 +- src/backend/utils/adt/pg_locale.c | 30 +++++++++++-------- src/include/utils/pg_locale.h | 7 ++++- 6 files changed, 57 insertions(+), 48 deletions(-) diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 85f3238eb0..1e180b844f 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -309,11 +309,11 @@ pg_wc_isdigit(pg_wchar c) isdigit((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); + return iswdigit_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); + isdigit_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -343,11 +343,11 @@ pg_wc_isalpha(pg_wchar c) isalpha((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); + return iswalpha_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); + isalpha_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -377,11 +377,11 @@ pg_wc_isalnum(pg_wchar c) isalnum((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); + return iswalnum_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); + isalnum_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -420,11 +420,11 @@ pg_wc_isupper(pg_wchar c) isupper((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); + return iswupper_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); + isupper_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -454,11 +454,11 @@ pg_wc_islower(pg_wchar c) islower((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); + return iswlower_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); + islower_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -488,11 +488,11 @@ pg_wc_isgraph(pg_wchar c) isgraph((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); + return iswgraph_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); + isgraph_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -522,11 +522,11 @@ pg_wc_isprint(pg_wchar c) isprint((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); + return iswprint_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); + isprint_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -556,11 +556,11 @@ pg_wc_ispunct(pg_wchar c) ispunct((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); + return iswpunct_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); + ispunct_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -590,11 +590,11 @@ pg_wc_isspace(pg_wchar c) isspace((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); + return iswspace_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); + isspace_l((unsigned char) c, pg_regex_locale->info.libc.lt)); break; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -632,11 +632,11 @@ pg_wc_toupper(pg_wchar c) return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); + return towupper_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); + return toupper_l((unsigned char) c, pg_regex_locale->info.libc.lt); return c; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU @@ -674,11 +674,11 @@ pg_wc_tolower(pg_wchar c) return c; case PG_REGEX_LOCALE_WIDE_L: if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); + return towlower_l((wint_t) c, pg_regex_locale->info.libc.lt); /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); + return tolower_l((unsigned char) c, pg_regex_locale->info.libc.lt); return c; case PG_REGEX_LOCALE_ICU: #ifdef USE_ICU diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 8736ada4be..2c3a28ca25 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1732,7 +1732,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (mylocale) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else workspace[curr_char] = towlower(workspace[curr_char]); } @@ -1763,7 +1763,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) for (p = result; *p; p++) { if (mylocale) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else *p = pg_tolower((unsigned char) *p); } @@ -1880,7 +1880,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) for (curr_char = 0; workspace[curr_char] != 0; curr_char++) { if (mylocale) - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); else workspace[curr_char] = towupper(workspace[curr_char]); } @@ -1911,7 +1911,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) for (p = result; *p; p++) { if (mylocale) - *p = toupper_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); else *p = pg_toupper((unsigned char) *p); } @@ -2084,10 +2084,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.libc.lt); else - workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt); - wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt); + workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.libc.lt); + wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.libc.lt); } else { @@ -2127,10 +2127,10 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) if (mylocale) { if (wasalnum) - *p = tolower_l((unsigned char) *p, mylocale->info.lt); + *p = tolower_l((unsigned char) *p, mylocale->info.libc.lt); else - *p = toupper_l((unsigned char) *p, mylocale->info.lt); - wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt); + *p = toupper_l((unsigned char) *p, mylocale->info.libc.lt); + wasalnum = isalnum_l((unsigned char) *p, mylocale->info.libc.lt); } else { diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 0807b89b17..4d9a207816 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -96,7 +96,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) if (locale_is_c) return pg_ascii_tolower(c); else if (locale) - return tolower_l(c, locale->info.lt); + return tolower_l(c, locale->info.libc.lt); else return pg_tolower(c); } diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 2635050861..6a1c64176d 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1509,7 +1509,7 @@ pattern_char_isalpha(char c, bool is_multibyte, return IS_HIGHBIT_SET(c) || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else if (locale && locale->provider == COLLPROVIDER_LIBC) - return isalpha_l((unsigned char) c, locale->info.lt); + return isalpha_l((unsigned char) c, locale->info.libc.lt); else return isalpha((unsigned char) c); } diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 2ffc551913..060dcbcb14 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -1350,9 +1350,7 @@ lc_collate_is_c(Oid collation) } else if (default_locale.provider == COLLPROVIDER_LIBC) { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); + localeptr = default_locale.info.libc.collate; } else elog(ERROR, "unexpected collation provider '%c'", @@ -1416,9 +1414,7 @@ lc_ctype_is_c(Oid collation) } else if (default_locale.provider == COLLPROVIDER_LIBC) { - localeptr = setlocale(LC_CTYPE, NULL); - if (!localeptr) - elog(ERROR, "invalid LC_CTYPE setting"); + localeptr = default_locale.info.libc.ctype; } else elog(ERROR, "unexpected collation provider '%c'", @@ -1629,7 +1625,11 @@ pg_init_database_collation() #endif } - default_locale.info.lt = loc; + default_locale.info.libc.collate = MemoryContextStrdup( + TopMemoryContext, datcollate); + default_locale.info.libc.ctype = MemoryContextStrdup( + TopMemoryContext, datctype); + default_locale.info.libc.lt = loc; } default_locale.provider = dbform->datlocprovider; @@ -1750,7 +1750,11 @@ pg_newlocale_from_collation(Oid collid) #endif } - result.info.lt = loc; + result.info.libc.collate = MemoryContextStrdup( + TopMemoryContext, collcollate); + result.info.libc.ctype = MemoryContextStrdup( + TopMemoryContext, collctype); + result.info.libc.lt = loc; } else if (collform->collprovider == COLLPROVIDER_ICU) { @@ -1990,7 +1994,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2, ((LPWSTR) a2p)[r] = 0; errno = 0; - result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt); + result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.libc.lt); if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */ ereport(ERROR, (errmsg("could not compare Unicode strings: %m"))); @@ -2027,7 +2031,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale) } else #endif /* WIN32 */ - result = strcoll_l(arg1, arg2, locale->info.lt); + result = strcoll_l(arg1, arg2, locale->info.libc.lt); return result; } @@ -2252,7 +2256,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize, Assert(locale->provider == COLLPROVIDER_LIBC); #ifdef TRUST_STRXFRM - return strxfrm_l(dest, src, destsize, locale->info.lt); + return strxfrm_l(dest, src, destsize, locale->info.libc.lt); #else /* shouldn't happen */ PGLOCALE_SUPPORT_ERROR(locale->provider); @@ -3185,7 +3189,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale) else { /* Use wcstombs_l for nondefault locales */ - result = wcstombs_l(to, from, tolen, locale->info.lt); + result = wcstombs_l(to, from, tolen, locale->info.libc.lt); } return result; @@ -3247,7 +3251,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, else { /* Use mbstowcs_l for nondefault locales */ - result = mbstowcs_l(to, str, tolen, locale->info.lt); + result = mbstowcs_l(to, str, tolen, locale->info.libc.lt); } pfree(str); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 3a9026e7b7..63b724dc1d 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -80,7 +80,12 @@ struct pg_locale_struct { const char *locale; } builtin; - locale_t lt; + struct + { + const char *collate; + const char *ctype; + locale_t lt; + } libc; #ifdef USE_ICU struct { -- 2.34.1
From f8532d5d287ff190917f4ac8bdc3de85ae697e8e Mon Sep 17 00:00:00 2001 From: Jeff Davis <j...@j-davis.com> Date: Fri, 14 Jun 2024 15:13:59 -0700 Subject: [PATCH v2 6/7] Simplify collation cache. Now that the result of pg_newlocale_from_collation() is always non-NULL, move the collate_is_c and ctype_is_c flags into pg_locale_t, and always use that. This commit eliminates the multi-stage initialization of the cache and the extra code in lc_collate_is_c() and lc_ctype_is_c(). --- src/backend/utils/adt/pg_locale.c | 160 +++++------------------------- src/include/utils/pg_locale.h | 2 + 2 files changed, 28 insertions(+), 134 deletions(-) diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index 060dcbcb14..435a37a0e3 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false; typedef struct { Oid collid; /* hash key: pg_collation OID */ - bool collate_is_c; /* is collation's LC_COLLATE C? */ - bool ctype_is_c; /* is collation's LC_CTYPE C? */ - bool flags_valid; /* true if above flags are valid */ pg_locale_t locale; /* locale_t struct, or 0 if not valid */ } collation_cache_entry; @@ -1230,7 +1227,7 @@ IsoLocaleName(const char *winlocname) */ static collation_cache_entry * -lookup_collation_cache(Oid collation, bool set_flags) +lookup_collation_cache(Oid collation) { collation_cache_entry *cache_entry; bool found; @@ -1256,59 +1253,9 @@ lookup_collation_cache(Oid collation, bool set_flags) * Make sure cache entry is marked invalid, in case we fail before * setting things. */ - cache_entry->flags_valid = false; cache_entry->locale = 0; } - if (set_flags && !cache_entry->flags_valid) - { - /* Attempt to set the flags */ - HeapTuple tp; - Form_pg_collation collform; - - tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for collation %u", collation); - collform = (Form_pg_collation) GETSTRUCT(tp); - - if (collform->collprovider == COLLPROVIDER_BUILTIN) - { - Datum datum; - const char *colllocale; - - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); - colllocale = TextDatumGetCString(datum); - - cache_entry->collate_is_c = true; - cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0); - } - else if (collform->collprovider == COLLPROVIDER_LIBC) - { - Datum datum; - const char *collcollate; - const char *collctype; - - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate); - collcollate = TextDatumGetCString(datum); - datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); - collctype = TextDatumGetCString(datum); - - cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) || - (strcmp(collcollate, "POSIX") == 0)); - cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) || - (strcmp(collctype, "POSIX") == 0)); - } - else - { - cache_entry->collate_is_c = false; - cache_entry->ctype_is_c = false; - } - - cache_entry->flags_valid = true; - - ReleaseSysCache(tp); - } - return cache_entry; } @@ -1326,45 +1273,6 @@ lc_collate_is_c(Oid collation) if (!OidIsValid(collation)) return false; - /* - * If we're asked about the default collation, we have to inquire of the C - * library. Cache the result so we only have to compute it once. - */ - if (collation == DEFAULT_COLLATION_OID) - { - static int result = -1; - const char *localeptr; - - if (result >= 0) - return (bool) result; - - if (default_locale.provider == COLLPROVIDER_BUILTIN) - { - result = true; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_ICU) - { - result = false; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_LIBC) - { - localeptr = default_locale.info.libc.collate; - } - else - elog(ERROR, "unexpected collation provider '%c'", - default_locale.provider); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; - } - /* * If we're asked about the built-in C/POSIX collations, we know that. */ @@ -1375,7 +1283,7 @@ lc_collate_is_c(Oid collation) /* * Otherwise, we have to consult pg_collation, but we cache that. */ - return (lookup_collation_cache(collation, true))->collate_is_c; + return pg_newlocale_from_collation(collation)->collate_is_c; } /* @@ -1391,44 +1299,6 @@ lc_ctype_is_c(Oid collation) if (!OidIsValid(collation)) return false; - /* - * If we're asked about the default collation, we have to inquire of the C - * library. Cache the result so we only have to compute it once. - */ - if (collation == DEFAULT_COLLATION_OID) - { - static int result = -1; - const char *localeptr; - - if (result >= 0) - return (bool) result; - - if (default_locale.provider == COLLPROVIDER_BUILTIN) - { - localeptr = default_locale.info.builtin.locale; - } - else if (default_locale.provider == COLLPROVIDER_ICU) - { - result = false; - return (bool) result; - } - else if (default_locale.provider == COLLPROVIDER_LIBC) - { - localeptr = default_locale.info.libc.ctype; - } - else - elog(ERROR, "unexpected collation provider '%c'", - default_locale.provider); - - if (strcmp(localeptr, "C") == 0) - result = true; - else if (strcmp(localeptr, "POSIX") == 0) - result = true; - else - result = false; - return (bool) result; - } - /* * If we're asked about the built-in C/POSIX collations, we know that. */ @@ -1439,7 +1309,7 @@ lc_ctype_is_c(Oid collation) /* * Otherwise, we have to consult pg_collation, but we cache that. */ - return (lookup_collation_cache(collation, true))->ctype_is_c; + return pg_newlocale_from_collation(collation)->ctype_is_c; } void @@ -1553,6 +1423,9 @@ pg_init_database_collation() builtin_validate_locale(dbform->encoding, datlocale); + default_locale.collate_is_c = true; + default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0); + default_locale.info.builtin.locale = MemoryContextStrdup( TopMemoryContext, datlocale); } @@ -1564,6 +1437,9 @@ pg_init_database_collation() datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale); datlocale = TextDatumGetCString(datum); + default_locale.collate_is_c = false; + default_locale.ctype_is_c = false; + datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); @@ -1585,6 +1461,11 @@ pg_init_database_collation() datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); datctype = TextDatumGetCString(datum); + default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) || + (strcmp(datcollate, "POSIX") == 0); + default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) || + (strcmp(datctype, "POSIX") == 0); + if (strcmp(datcollate, datctype) == 0) { /* Normal case where they're the same */ @@ -1665,7 +1546,7 @@ pg_newlocale_from_collation(Oid collid) if (collid == DEFAULT_COLLATION_OID) return &default_locale; - cache_entry = lookup_collation_cache(collid, false); + cache_entry = lookup_collation_cache(collid); if (cache_entry->locale == 0) { @@ -1694,6 +1575,9 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); locstr = TextDatumGetCString(datum); + result.collate_is_c = true; + result.collate_is_c = (strcmp(locstr, "C") == 0); + builtin_validate_locale(GetDatabaseEncoding(), locstr); result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext, @@ -1710,6 +1594,11 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype); collctype = TextDatumGetCString(datum); + result.collate_is_c = (strcmp(collcollate, "C") == 0) || + (strcmp(collcollate, "POSIX") == 0); + result.ctype_is_c = (strcmp(collctype, "C") == 0) || + (strcmp(collctype, "POSIX") == 0); + if (strcmp(collcollate, collctype) == 0) { /* Normal case where they're the same */ @@ -1764,6 +1653,9 @@ pg_newlocale_from_collation(Oid collid) datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale); iculocstr = TextDatumGetCString(datum); + result.collate_is_c = false; + result.ctype_is_c = false; + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); if (!isnull) icurules = TextDatumGetCString(datum); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 63b724dc1d..a177b09e09 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -74,6 +74,8 @@ struct pg_locale_struct { char provider; bool deterministic; + bool collate_is_c; + bool ctype_is_c; union { struct -- 2.34.1