On Mon, 2024-07-29 at 21:45 +0200, Peter Eisentraut wrote:
> I have also re-reviewed the patches and I agree they are good to go.
I found a couple issues with the later patches:
* There was still some confusion about the default collation vs.
datcollate/datctype for callers of wchar2char() and char2wchar() (those
functions only work for libc). I introduced a new pg_locale_t structure
to represent datcollate/datctype regardless of datlocprovider to solve
this.
* Another loose end relying on setlocale(): in selfuncs.c, there's
still a call directly to strxfrm(), which depends on setlocale(). I
changed this to lookup the collation and then use pg_strxfrm(). That
should improve histogram selectivity estimates because it uses the
correct provider, rather than relying on setlocale(), right?
New series attached.
Regards,
Jeff Davis
From 5b903c82f34f5da9cab58ecd0a2683454d6ac9ed Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 14:48:07 -0700
Subject: [PATCH v6 1/3] Make datcollate/datctype accessible as a pg_locale_t.
get_db_env_locale() returns a libc locale representing the LC_COLLATE
/ LC_CTYPE environment, which is the same as the database default
collation if and only if the datlocprovider is libc.
Update callers in ts_locale.c to use get_db_env_locale() instead of
NULL.
Discussion: https://postgr.es/m/[email protected]
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
src/backend/tsearch/ts_locale.c | 37 ++++++++++++++++---------
src/backend/tsearch/wparser_def.c | 6 +++--
src/backend/utils/adt/pg_locale.c | 45 ++++++++++++++++++++++++++++---
src/backend/utils/init/postinit.c | 5 +---
src/include/utils/pg_locale.h | 5 ++--
5 files changed, 74 insertions(+), 24 deletions(-)
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index bc44599de6a..6befd8e82d6 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -13,6 +13,7 @@
*/
#include "postgres.h"
+#include "catalog/pg_collation.h"
#include "common/string.h"
#include "storage/fd.h"
#include "tsearch/ts_locale.h"
@@ -36,9 +37,11 @@ t_isdigit(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || database_ctype_is_c)
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
+
+ if (clen == 1 || mylocale->ctype_is_c)
return isdigit(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -51,9 +54,11 @@ t_isspace(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || database_ctype_is_c)
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
+
+ if (clen == 1 || mylocale->ctype_is_c)
return isspace(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -66,9 +71,11 @@ t_isalpha(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || database_ctype_is_c)
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
+
+ if (clen == 1 || mylocale->ctype_is_c)
return isalpha(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -81,9 +88,11 @@ t_isalnum(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || database_ctype_is_c)
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
+
+ if (clen == 1 || mylocale->ctype_is_c)
return isalnum(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -96,9 +105,11 @@ t_isprint(const char *ptr)
{
int clen = pg_mblen(ptr);
wchar_t character[WC_BUF_LEN];
- pg_locale_t mylocale = 0; /* TODO */
- if (clen == 1 || database_ctype_is_c)
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
+
+ if (clen == 1 || mylocale->ctype_is_c)
return isprint(TOUCHAR(ptr));
char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale);
@@ -266,7 +277,9 @@ char *
lowerstr_with_len(const char *str, int len)
{
char *out;
- pg_locale_t mylocale = 0; /* TODO */
+
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
if (len == 0)
return pstrdup("");
@@ -277,7 +290,7 @@ lowerstr_with_len(const char *str, int len)
* Also, for a C locale there is no need to process as multibyte. From
* backend/utils/adt/oracle_compat.c Teodor
*/
- if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c)
+ if (pg_database_encoding_max_length() > 1 && !mylocale->ctype_is_c)
{
wchar_t *wstr,
*wptr;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 3919ef27b57..45caec0c4f0 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -17,6 +17,7 @@
#include <limits.h>
#include <wctype.h>
+#include "catalog/pg_collation.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -299,10 +300,11 @@ TParserInit(char *str, int len)
*/
if (prs->charmaxlen > 1)
{
- pg_locale_t mylocale = 0; /* TODO */
+ /* TODO: determine collation properly */
+ pg_locale_t mylocale = get_db_env_locale();
prs->usewide = true;
- if (database_ctype_is_c)
+ if (mylocale->ctype_is_c)
{
/*
* char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 627ab89d7cc..0295d834cc5 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -114,10 +114,8 @@ char *localized_full_days[7 + 1];
char *localized_abbrev_months[12 + 1];
char *localized_full_months[12 + 1];
-/* is the databases's LC_CTYPE the C locale? */
-bool database_ctype_is_c = false;
-
static struct pg_locale_struct default_locale;
+static struct pg_locale_struct database_env_locale;
/* indicates whether locale information cache is valid */
static bool CurrentLocaleConvValid = false;
@@ -1471,6 +1469,42 @@ pg_locale_deterministic(pg_locale_t locale)
return locale->deterministic;
}
+/*
+ * Initialize the database environment locale and store in a pg_locale_t.
+ */
+void
+init_db_env_locale(const char *datcollate, const char *datctype)
+{
+ Assert(database_env_locale.provider == (char) 0);
+
+ database_env_locale.provider = COLLPROVIDER_LIBC;
+ database_env_locale.deterministic = true;
+ database_env_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
+ (strcmp(datcollate, "POSIX") == 0);
+ database_env_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
+ (strcmp(datctype, "POSIX") == 0);
+
+ make_libc_collator(datcollate, datctype, &database_env_locale);
+}
+
+/*
+ * Return pg_locale_t representing the database environment locale.
+ *
+ * The provider is always libc, and it represents the server environment
+ * LC_COLLATE and LC_CTYPE.
+ *
+ * Most callers should use pg_newlocale_from_collation(DEFAULT_COLLATION_OID)
+ * instead to get a pg_locale_t representing the database default collation
+ * (which might be any provider). Use get_db_env_locale() only if the libc
+ * provider is needed, such as with wchar2char()/char2wchar().
+ */
+pg_locale_t
+get_db_env_locale(void)
+{
+ Assert(database_env_locale.provider != (char) 0);
+ return &database_env_locale;
+}
+
/*
* Initialize default_locale with database locale settings.
*/
@@ -1482,6 +1516,8 @@ init_database_collation(void)
Datum datum;
bool isnull;
+ Assert(default_locale.provider == (char) 0);
+
/* Fetch our pg_database row normally, via syscache */
tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
if (!HeapTupleIsValid(tup))
@@ -1571,7 +1607,10 @@ pg_newlocale_from_collation(Oid collid)
Assert(OidIsValid(collid));
if (collid == DEFAULT_COLLATION_OID)
+ {
+ Assert(default_locale.provider != (char) 0);
return &default_locale;
+ }
cache_entry = lookup_collation_cache(collid);
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 13524ea488a..23ac403e390 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -418,10 +418,7 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
" which is not recognized by setlocale().", ctype),
errhint("Recreate the database with another locale or install the missing locale.")));
- if (strcmp(ctype, "C") == 0 ||
- strcmp(ctype, "POSIX") == 0)
- database_ctype_is_c = true;
-
+ init_db_env_locale(collate, ctype);
init_database_collation();
/*
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index f41d33975be..47b2942c9d8 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -48,9 +48,6 @@ extern PGDLLIMPORT char *localized_full_days[];
extern PGDLLIMPORT char *localized_abbrev_months[];
extern PGDLLIMPORT char *localized_full_months[];
-/* is the databases's LC_CTYPE the C locale? */
-extern PGDLLIMPORT bool database_ctype_is_c;
-
extern bool check_locale(int category, const char *locale, char **canonname);
extern char *pg_perm_setlocale(int category, const char *locale);
@@ -112,6 +109,8 @@ extern void make_icu_collator(const char *iculocstr,
struct pg_locale_struct *resultp);
extern bool pg_locale_deterministic(pg_locale_t locale);
+extern void init_db_env_locale(const char *datcollate, const char *datctype);
+extern pg_locale_t get_db_env_locale(void);
extern void init_database_collation(void);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
--
2.34.1
From 1eea055318b07155fe025d9f6cf56dadcea040a0 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 5 Jun 2024 11:58:59 -0700
Subject: [PATCH v6 2/3] Remove support for null pg_locale_t.
Previously, passing NULL for pg_locale_t meant "use the libc provider
and the server environment". Now that the database collation is
represented as a proper pg_locale_t (not dependent on setlocale()),
remove special cases for NULL.
Discussion: https://postgr.es/m/[email protected]
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
src/backend/access/hash/hashfunc.c | 10 +--
src/backend/regex/regc_pg_locale.c | 113 +----------------------------
src/backend/utils/adt/formatting.c | 84 ++++++---------------
src/backend/utils/adt/like.c | 10 +--
src/backend/utils/adt/pg_locale.c | 78 ++++++--------------
src/backend/utils/adt/varchar.c | 10 +--
src/backend/utils/adt/varlena.c | 28 +++----
7 files changed, 69 insertions(+), 264 deletions(-)
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index ce8ee0ea2ef..d151751e185 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -268,7 +268,7 @@ hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -277,8 +277,7 @@ hashtext(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
@@ -322,7 +321,7 @@ hashtextextended(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_PP(0);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -331,8 +330,7 @@ hashtextextended(PG_FUNCTION_ARGS)
errmsg("could not determine which collation to use for string hashing"),
errhint("Use the COLLATE clause to set the collation explicitly.")));
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 9d98d10a285..947d73f3e0f 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -67,8 +67,6 @@ typedef enum
{
PG_REGEX_LOCALE_C, /* C locale (encoding independent) */
PG_REGEX_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */
- PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */
PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */
PG_REGEX_LOCALE_1BYTE_L, /* Use locale_t <ctype.h> functions */
PG_REGEX_LOCALE_ICU, /* Use ICU uchar.h functions */
@@ -261,13 +259,13 @@ pg_set_regex_collation(Oid collation)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("nondeterministic collations are not supported for regular expressions")));
- if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
+ if (pg_regex_locale->provider == COLLPROVIDER_BUILTIN)
{
Assert(GetDatabaseEncoding() == PG_UTF8);
pg_regex_strategy = PG_REGEX_BUILTIN;
}
#ifdef USE_ICU
- else if (pg_regex_locale && pg_regex_locale->provider == COLLPROVIDER_ICU)
+ else if (pg_regex_locale->provider == COLLPROVIDER_ICU)
{
pg_regex_strategy = PG_REGEX_LOCALE_ICU;
}
@@ -275,19 +273,9 @@ pg_set_regex_collation(Oid collation)
else
{
if (GetDatabaseEncoding() == PG_UTF8)
- {
- if (pg_regex_locale)
- pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
- else
- pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
- }
+ pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
else
- {
- if (pg_regex_locale)
- pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
- else
- pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
- }
+ pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
}
pg_regex_collation = collation;
@@ -304,13 +292,6 @@ pg_wc_isdigit(pg_wchar c)
(pg_char_properties[c] & PG_ISDIGIT));
case PG_REGEX_BUILTIN:
return pg_u_isdigit(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
@@ -338,13 +319,6 @@ pg_wc_isalpha(pg_wchar c)
(pg_char_properties[c] & PG_ISALPHA));
case PG_REGEX_BUILTIN:
return pg_u_isalpha(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
@@ -372,13 +346,6 @@ pg_wc_isalnum(pg_wchar c)
(pg_char_properties[c] & PG_ISALNUM));
case PG_REGEX_BUILTIN:
return pg_u_isalnum(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
@@ -415,13 +382,6 @@ pg_wc_isupper(pg_wchar c)
(pg_char_properties[c] & PG_ISUPPER));
case PG_REGEX_BUILTIN:
return pg_u_isupper(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -449,13 +409,6 @@ pg_wc_islower(pg_wchar c)
(pg_char_properties[c] & PG_ISLOWER));
case PG_REGEX_BUILTIN:
return pg_u_islower(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -483,13 +436,6 @@ pg_wc_isgraph(pg_wchar c)
(pg_char_properties[c] & PG_ISGRAPH));
case PG_REGEX_BUILTIN:
return pg_u_isgraph(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
@@ -517,13 +463,6 @@ pg_wc_isprint(pg_wchar c)
(pg_char_properties[c] & PG_ISPRINT));
case PG_REGEX_BUILTIN:
return pg_u_isprint(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
@@ -551,13 +490,6 @@ pg_wc_ispunct(pg_wchar c)
(pg_char_properties[c] & PG_ISPUNCT));
case PG_REGEX_BUILTIN:
return pg_u_ispunct(c, true);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
@@ -585,13 +517,6 @@ pg_wc_isspace(pg_wchar c)
(pg_char_properties[c] & PG_ISSPACE));
case PG_REGEX_BUILTIN:
return pg_u_isspace(c);
- case PG_REGEX_LOCALE_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
@@ -620,20 +545,6 @@ pg_wc_toupper(pg_wchar c)
return c;
case PG_REGEX_BUILTIN:
return unicode_uppercase_simple(c);
- case PG_REGEX_LOCALE_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper((unsigned char) c);
- return c;
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towupper_l((wint_t) c, pg_regex_locale->info.lt);
@@ -662,20 +573,6 @@ pg_wc_tolower(pg_wchar c)
return c;
case PG_REGEX_BUILTIN:
return unicode_lowercase_simple(c);
- case PG_REGEX_LOCALE_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower((wint_t) c);
- /* FALL THRU */
- case PG_REGEX_LOCALE_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower((unsigned char) c);
- return c;
case PG_REGEX_LOCALE_WIDE_L:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towlower_l((wint_t) c, pg_regex_locale->info.lt);
@@ -829,11 +726,9 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
case PG_REGEX_BUILTIN:
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
break;
- case PG_REGEX_LOCALE_WIDE:
case PG_REGEX_LOCALE_WIDE_L:
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
break;
- case PG_REGEX_LOCALE_1BYTE:
case PG_REGEX_LOCALE_1BYTE_L:
#if MAX_SIMPLE_CHR >= UCHAR_MAX
max_chr = (pg_wchar) UCHAR_MAX;
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 8736ada4be2..68069fcfd3b 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1665,7 +1665,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar;
int32_t len_conv;
@@ -1681,7 +1681,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -1710,7 +1710,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -1730,12 +1730,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
- if (mylocale)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towlower(workspace[curr_char]);
- }
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
@@ -1761,12 +1756,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
- {
- if (mylocale)
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = pg_tolower((unsigned char) *p);
- }
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
}
}
}
@@ -1813,7 +1803,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
@@ -1829,7 +1819,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -1858,7 +1848,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -1878,12 +1868,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- {
- if (mylocale)
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- }
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
@@ -1909,12 +1894,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
- {
- if (mylocale)
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = pg_toupper((unsigned char) *p);
- }
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
}
}
}
@@ -2003,7 +1983,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
mylocale = pg_newlocale_from_collation(collid);
#ifdef USE_ICU
- if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
@@ -2019,7 +1999,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
else
#endif
- if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN)
+ if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@@ -2060,7 +2040,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
}
else
{
- Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+ Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
@@ -2081,22 +2061,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
- if (mylocale)
- {
- if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
- else
- workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
- wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
- }
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
- {
- if (wasalnum)
- workspace[curr_char] = towlower(workspace[curr_char]);
- else
- workspace[curr_char] = towupper(workspace[curr_char]);
- wasalnum = iswalnum(workspace[curr_char]);
- }
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
/*
@@ -2124,22 +2093,11 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
*/
for (p = result; *p; p++)
{
- if (mylocale)
- {
- if (wasalnum)
- *p = tolower_l((unsigned char) *p, mylocale->info.lt);
- else
- *p = toupper_l((unsigned char) *p, mylocale->info.lt);
- wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
- }
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
- {
- if (wasalnum)
- *p = pg_tolower((unsigned char) *p);
- else
- *p = pg_toupper((unsigned char) *p);
- wasalnum = isalnum((unsigned char) *p);
- }
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
}
}
}
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 57ead66b5aa..0ecc96d48e5 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -174,8 +174,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
*p;
int slen,
plen;
- pg_locale_t locale = 0;
- bool locale_is_c = false;
+ pg_locale_t locale;
if (!OidIsValid(collation))
{
@@ -189,10 +188,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
errhint("Use the COLLATE clause to set the collation explicitly.")));
}
- if (lc_ctype_is_c(collation))
- locale_is_c = true;
- else
- locale = pg_newlocale_from_collation(collation);
+ locale = pg_newlocale_from_collation(collation);
if (!pg_locale_deterministic(locale))
ereport(ERROR,
@@ -228,7 +224,7 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
+ return SB_IMatchText(s, slen, p, plen, locale, locale->ctype_is_c);
}
}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 0295d834cc5..ccd6180a743 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1169,7 +1169,8 @@ get_iso_localename(const char *winlocname)
char *hyphen;
/* Locale names use only ASCII, any conversion locale suffices. */
- rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
+ rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages),
+ get_db_env_locale());
if (rc == -1 || rc == sizeof(iso_lc_messages))
return NULL;
@@ -1462,11 +1463,7 @@ make_icu_collator(const char *iculocstr,
bool
pg_locale_deterministic(pg_locale_t locale)
{
- /* default locale must always be deterministic */
- if (locale == NULL)
- return true;
- else
- return locale->deterministic;
+ return locale->deterministic;
}
/*
@@ -1867,7 +1864,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
int r;
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
Assert(GetDatabaseEncoding() == PG_UTF8);
#ifndef WIN32
Assert(false);
@@ -1907,10 +1904,7 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
((LPWSTR) a2p)[r] = 0;
errno = 0;
- if (locale)
- result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
- else
- result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+ result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
@@ -1936,7 +1930,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
{
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef WIN32
if (GetDatabaseEncoding() == PG_UTF8)
{
@@ -1947,10 +1941,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale)
result = strcoll_l(arg1, arg2, locale->info.lt);
- else
- result = strcoll(arg1, arg2);
return result;
}
@@ -1972,7 +1963,7 @@ pg_strncoll_libc(const char *arg1, size_t len1, const char *arg2, size_t len2,
char *arg2n;
int result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef WIN32
/* check for this case before doing the work for nul-termination */
@@ -2118,7 +2109,7 @@ pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale)
{
int result;
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strcoll_libc(arg1, arg2, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2154,7 +2145,7 @@ pg_strncoll(const char *arg1, size_t len1, const char *arg2, size_t len2,
{
int result;
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strncoll_libc(arg1, len1, arg2, len2, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2172,13 +2163,10 @@ static size_t
pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
pg_locale_t locale)
{
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
#ifdef TRUST_STRXFRM
- if (locale)
- return strxfrm_l(dest, src, destsize, locale->info.lt);
- else
- return strxfrm(dest, src, destsize);
+ return strxfrm_l(dest, src, destsize, locale->info.lt);
#else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
@@ -2195,7 +2183,7 @@ pg_strnxfrm_libc(char *dest, const char *src, size_t srclen, size_t destsize,
size_t bufsize = srclen + 1;
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (bufsize > TEXTBUFLEN)
buf = palloc(bufsize);
@@ -2367,7 +2355,7 @@ pg_strnxfrm_prefix_icu(char *dest, const char *src, int32_t srclen,
bool
pg_strxfrm_enabled(pg_locale_t locale)
{
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
#ifdef TRUST_STRXFRM
return true;
#else
@@ -2401,7 +2389,7 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
{
size_t result = 0; /* keep compiler quiet */
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strxfrm_libc(dest, src, destsize, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2438,7 +2426,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
result = pg_strnxfrm_libc(dest, src, srclen, destsize, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
@@ -2458,7 +2446,7 @@ pg_strnxfrm(char *dest, size_t destsize, const char *src, size_t srclen,
bool
pg_strxfrm_prefix_enabled(pg_locale_t locale)
{
- if (!locale || locale->provider == COLLPROVIDER_LIBC)
+ if (locale->provider == COLLPROVIDER_LIBC)
return false;
else if (locale->provider == COLLPROVIDER_ICU)
return true;
@@ -2488,13 +2476,11 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale)
- PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
+ if (locale->provider == COLLPROVIDER_ICU)
result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
else
+#endif
PGLOCALE_SUPPORT_ERROR(locale->provider);
return result;
@@ -2523,13 +2509,11 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
{
size_t result = 0; /* keep compiler quiet */
- if (!locale)
- PGLOCALE_SUPPORT_ERROR(COLLPROVIDER_LIBC);
#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
+ if (locale->provider == COLLPROVIDER_ICU)
result = pg_strnxfrm_prefix_icu(dest, src, -1, destsize, locale);
-#endif
else
+#endif
PGLOCALE_SUPPORT_ERROR(locale->provider);
return result;
@@ -3086,7 +3070,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
{
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (tolen == 0)
return 0;
@@ -3114,12 +3098,6 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
#endif /* WIN32 */
- if (locale == (pg_locale_t) 0)
- {
- /* Use wcstombs directly for the default locale */
- result = wcstombs(to, from, tolen);
- }
- else
{
/* Use wcstombs_l for nondefault locales */
result = wcstombs_l(to, from, tolen, locale->info.lt);
@@ -3143,7 +3121,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
{
size_t result;
- Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+ Assert(locale->provider == COLLPROVIDER_LIBC);
if (tolen == 0)
return 0;
@@ -3176,16 +3154,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
/* mbstowcs requires ending '\0' */
char *str = pnstrdup(from, fromlen);
- if (locale == (pg_locale_t) 0)
- {
- /* Use mbstowcs directly for the default locale */
- result = mbstowcs(to, str, tolen);
- }
- else
- {
- /* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale->info.lt);
- }
+ /* Use mbstowcs_l for nondefault locales */
+ result = mbstowcs_l(to, str, tolen, locale->info.lt);
pfree(str);
}
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 02dfe219f54..829375cd1a3 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -999,7 +999,7 @@ hashbpchar(PG_FUNCTION_ARGS)
Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -1011,8 +1011,7 @@ hashbpchar(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
@@ -1054,7 +1053,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
Oid collid = PG_GET_COLLATION();
char *keydata;
int keylen;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
Datum result;
if (!collid)
@@ -1066,8 +1065,7 @@ hashbpcharextended(PG_FUNCTION_ARGS)
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (pg_locale_deterministic(mylocale))
{
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index d2e2e9bbba0..52ab8c43c66 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1217,12 +1217,11 @@ text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
{
int len1 = VARSIZE_ANY_EXHDR(t1);
int len2 = VARSIZE_ANY_EXHDR(t2);
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
check_collation_set(collid);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (!pg_locale_deterministic(mylocale))
ereport(ERROR,
@@ -1619,18 +1618,14 @@ Datum
texteq(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
- bool locale_is_c = false;
pg_locale_t mylocale = 0;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid))
- locale_is_c = true;
- else
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
- if (locale_is_c || pg_locale_deterministic(mylocale))
+ if (pg_locale_deterministic(mylocale))
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1678,18 +1673,14 @@ Datum
textne(PG_FUNCTION_ARGS)
{
Oid collid = PG_GET_COLLATION();
- bool locale_is_c = false;
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
bool result;
check_collation_set(collid);
- if (lc_collate_is_c(collid))
- locale_is_c = true;
- else
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
- if (locale_is_c || pg_locale_deterministic(mylocale))
+ if (pg_locale_deterministic(mylocale))
{
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
@@ -1793,15 +1784,14 @@ text_starts_with(PG_FUNCTION_ARGS)
Datum arg1 = PG_GETARG_DATUM(0);
Datum arg2 = PG_GETARG_DATUM(1);
Oid collid = PG_GET_COLLATION();
- pg_locale_t mylocale = 0;
+ pg_locale_t mylocale;
bool result;
Size len1,
len2;
check_collation_set(collid);
- if (!lc_collate_is_c(collid))
- mylocale = pg_newlocale_from_collation(collid);
+ mylocale = pg_newlocale_from_collation(collid);
if (!pg_locale_deterministic(mylocale))
ereport(ERROR,
--
2.34.1
From 9bd779fb711c902b33cfd3a5350e0736d7ceb138 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 29 Jul 2024 23:58:29 -0700
Subject: [PATCH v6 3/3] selfuncs.c: use pg_strxfrm() instead of strxfrm().
pg_strxfrm() takes a pg_locale_t, so it works properly with other
providers and does not rely on setlocale().
Discussion: https://postgr.es/m/[email protected]
Reviewed-by: Peter Eisentraut, Andreas Karlsson
---
src/backend/utils/adt/pg_locale.c | 23 ++++++++++++++++-------
src/backend/utils/adt/selfuncs.c | 9 +++++++--
2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index ccd6180a743..17c55c5ab17 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -2164,14 +2164,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
pg_locale_t locale)
{
Assert(locale->provider == COLLPROVIDER_LIBC);
-
-#ifdef TRUST_STRXFRM
return strxfrm_l(dest, src, destsize, locale->info.lt);
-#else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
- return 0; /* keep compiler quiet */
-#endif
}
static size_t
@@ -2380,6 +2373,10 @@ pg_strxfrm_enabled(pg_locale_t locale)
* The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
* may be NULL.
*
+ * Not all providers support pg_strxfrm() safely. The caller should check
+ * pg_strxfrm_enabled() first, otherwise this function may return wrong
+ * results or an error.
+ *
* Returns the number of bytes needed to store the transformed string,
* excluding the terminating nul byte. If the value returned is 'destsize' or
* greater, the resulting contents of 'dest' are undefined.
@@ -2412,6 +2409,10 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
* 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
* be NULL.
*
+ * Not all providers support pg_strnxfrm() safely. The caller should check
+ * pg_strxfrm_enabled() first, otherwise this function may return wrong
+ * results or an error.
+ *
* Returns the number of bytes needed to store the transformed string,
* excluding the terminating nul byte. If the value returned is 'destsize' or
* greater, the resulting contents of 'dest' are undefined.
@@ -2466,6 +2467,10 @@ pg_strxfrm_prefix_enabled(pg_locale_t locale)
*
* The provided 'src' must be nul-terminated.
*
+ * Not all providers support pg_strxfrm_prefix() safely. The caller should
+ * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
+ * wrong results or an error.
+ *
* If destsize is not large enough to hold the resulting byte sequence, stores
* only the first destsize bytes in 'dest'. Returns the number of bytes
* actually copied to 'dest'.
@@ -2495,6 +2500,10 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
*
* The provided 'src' must be nul-terminated.
*
+ * Not all providers support pg_strnxfrm_prefix() safely. The caller should
+ * check pg_strxfrm_prefix_enabled() first, otherwise this function may return
+ * wrong results or an error.
+ *
* If destsize is not large enough to hold the resulting byte sequence, stores
* only the first destsize bytes in 'dest'. Returns the number of bytes
* actually copied to 'dest'.
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 877a62a62ec..673cfd9e703 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -4673,6 +4673,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
if (!lc_collate_is_c(collid))
{
+ pg_locale_t mylocale = pg_newlocale_from_collation(collid);
char *xfrmstr;
size_t xfrmlen;
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
@@ -4685,8 +4686,12 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
* bogus data or set an error. This is not really a problem unless it
* crashes since it will only give an estimation error and nothing
* fatal.
+ *
+ * XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
+ * some cases, libc strxfrm() may return the wrong results, but that
+ * will only lead to an estimation error.
*/
- xfrmlen = strxfrm(NULL, val, 0);
+ xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
#ifdef WIN32
/*
@@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
return val;
#endif
xfrmstr = (char *) palloc(xfrmlen + 1);
- xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+ xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
/*
* Some systems (e.g., glibc) can return a smaller value from the
--
2.34.1