On Mon, 2024-12-02 at 23:58 -0800, Jeff Davis wrote:
> On Mon, 2024-12-02 at 16:39 +0100, Andreas Karlsson wrote:
> > I feel your first patch in the series is something you can just
> > commit.
>
> Done.
>
> I combined your patches and mine into the attached v10 series.
Here's v12 after committing a few of the earlier patches.
I changed the ctype method table to have separate methods for isdigit,
isalpha, etc., instead of the combined char_properties method. That's
more consistent with how things are currently done.
I may still be seeing a tiny perf regression using the same test as
[1], but I don't expect it to have a practical impact. Let me know if
you think that's a problem.
I committed your change to move the version reporting into the
provider-specific files.
Your other change to lookup_collation() in namespace.c should also
account for the code in DefineCollation() -- I don't think it makes
sense to refactor one without the other.
Regards,
Jeff Davis
[1]
https://www.postgresql.org/message-id/78a1b434ff40510dc5aaabe986299a09f4da90cf.camel%40j-davis.com
From 129b35a2ecc7243def519e50525b0476220e17e6 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Fri, 29 Nov 2024 09:37:43 -0800
Subject: [PATCH v12 1/4] Control ctype behavior internally with a method
table.
Previously, pattern matching and case mapping behavior branched based
on the provider.
Refactor to use a method table, which is less error-prone and easier
to hook.
---
src/backend/regex/regc_pg_locale.c | 377 +++++-----------------
src/backend/utils/adt/like.c | 22 +-
src/backend/utils/adt/like_support.c | 7 +-
src/backend/utils/adt/pg_locale.c | 101 +++---
src/backend/utils/adt/pg_locale_builtin.c | 106 +++++-
src/backend/utils/adt/pg_locale_icu.c | 109 ++++++-
src/backend/utils/adt/pg_locale_libc.c | 279 +++++++++++++---
src/include/utils/pg_locale.h | 49 +++
src/tools/pgindent/typedefs.list | 1 -
9 files changed, 618 insertions(+), 433 deletions(-)
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 2360d08efae..31b8f4a9478 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -63,18 +63,13 @@
* NB: the coding here assumes pg_wchar is an unsigned type.
*/
-typedef enum
-{
- PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */
- PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */
- PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */
- PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
static pg_locale_t pg_regex_locale;
+static struct pg_locale_struct dummy_c_locale = {
+ .collate_is_c = true,
+ .ctype_is_c = true,
+};
+
/*
* Hard-wired character properties for C locale
*/
@@ -231,7 +226,6 @@ void
pg_set_regex_collation(Oid collation)
{
pg_locale_t locale = 0;
- PG_Locale_Strategy strategy;
if (!OidIsValid(collation))
{
@@ -252,8 +246,7 @@ pg_set_regex_collation(Oid collation)
* catalog access is available, so we can't call
* pg_newlocale_from_collation().
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
+ locale = &dummy_c_locale;
}
else
{
@@ -270,113 +263,41 @@ pg_set_regex_collation(Oid collation)
* C/POSIX collations use this path regardless of database
* encoding
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
- }
- else if (locale->provider == COLLPROVIDER_BUILTIN)
- {
- Assert(GetDatabaseEncoding() == PG_UTF8);
- strategy = PG_REGEX_STRATEGY_BUILTIN;
- }
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- {
- strategy = PG_REGEX_STRATEGY_ICU;
- }
-#endif
- else
- {
- Assert(locale->provider == COLLPROVIDER_LIBC);
- if (GetDatabaseEncoding() == PG_UTF8)
- strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
- else
- strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+ locale = &dummy_c_locale;
}
}
- pg_regex_strategy = strategy;
pg_regex_locale = locale;
}
static int
pg_wc_isdigit(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISDIGIT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isdigit(c, true);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isdigit(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISDIGIT));
+ else
+ return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
}
static int
pg_wc_isalpha(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALPHA));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalpha(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalpha(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALPHA));
+ else
+ return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
}
static int
pg_wc_isalnum(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALNUM));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalnum(c, true);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalnum(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALNUM));
+ else
+ return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
}
static int
@@ -391,219 +312,87 @@ pg_wc_isword(pg_wchar c)
static int
pg_wc_isupper(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISUPPER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isupper(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isupper(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISUPPER));
+ else
+ return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
}
static int
pg_wc_islower(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISLOWER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_islower(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_islower(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISLOWER));
+ else
+ return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
}
static int
pg_wc_isgraph(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISGRAPH));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isgraph(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isgraph(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISGRAPH));
+ else
+ return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
}
static int
pg_wc_isprint(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPRINT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isprint(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isprint(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPRINT));
+ else
+ return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
}
static int
pg_wc_ispunct(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPUNCT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_ispunct(c, true);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_ispunct(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPUNCT));
+ else
+ return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
}
static int
pg_wc_isspace(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISSPACE));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isspace(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isspace(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISSPACE));
+ else
+ return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
}
static pg_wchar
pg_wc_toupper(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_uppercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_toupper(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
}
static pg_wchar
pg_wc_tolower(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_lowercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_tolower(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
}
@@ -729,37 +518,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
* would always be true for production values of MAX_SIMPLE_CHR, but it's
* useful to allow it to be small for testing purposes.)
*/
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
#if MAX_SIMPLE_CHR >= 127
- max_chr = (pg_wchar) 127;
- pcc->cv.cclasscode = -1;
+ max_chr = (pg_wchar) 127;
+ pcc->cv.cclasscode = -1;
#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+ max_chr = (pg_wchar) MAX_SIMPLE_CHR;
#endif
- break;
- case PG_REGEX_STRATEGY_BUILTIN:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
- max_chr = (pg_wchar) UCHAR_MAX;
+ }
+ else
+ {
+ if (pg_regex_locale->ctype->max_chr != 0 &&
+ pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+ {
+ max_chr = pg_regex_locale->ctype->max_chr;
pcc->cv.cclasscode = -1;
-#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
- break;
- case PG_REGEX_STRATEGY_ICU:
+ }
+ else
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- default:
- Assert(false);
- max_chr = 0; /* can't get here, but keep compiler quiet */
- break;
}
/*
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7f4cf614585..4216ac17f43 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
else if (locale->is_default)
return pg_tolower(c);
else
- return tolower_l(c, locale->info.lt);
+ return char_tolower(c, locale);
}
@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
* way.
*/
- if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+ if (locale->ctype_is_c ||
+ (char_tolower_enabled(locale) &&
+ pg_database_encoding_max_length() == 1))
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale);
+ }
+ else
{
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
else
return MB_MatchText(s, slen, p, plen, 0);
}
- else
- {
- p = VARDATA_ANY(pat);
- plen = VARSIZE_ANY_EXHDR(pat);
- s = VARDATA_ANY(str);
- slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale);
- }
}
/*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..999f23f86d5 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
{
if (locale->ctype_is_c)
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else if (is_multibyte && IS_HIGHBIT_SET(c))
- return true;
- else if (locale->provider != COLLPROVIDER_LIBC)
- return IS_HIGHBIT_SET(c) ||
- (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else
- return isalpha_l((unsigned char) c, locale->info.lt);
+ return char_is_cased(c, locale);
}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 875cca6efc8..cdb4950ac47 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -100,27 +100,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern char *get_collation_actual_version_libc(const char *collcollate);
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
/* GUC settings */
char *locale_messages;
char *locale_monetary;
@@ -1232,6 +1211,9 @@ create_pg_locale(Oid collid, MemoryContext context)
Assert((result->collate_is_c && result->collate == NULL) ||
(!result->collate_is_c && result->collate != NULL));
+ Assert((result->ctype_is_c && result->ctype == NULL) ||
+ (!result->ctype_is_c && result->ctype != NULL));
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
&isnull);
if (!isnull)
@@ -1394,57 +1376,21 @@ size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strtitle_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strupper_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
/*
@@ -1581,6 +1527,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
}
+/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+ return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_tolower(ch, locale);
+}
+
/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 5161915e6b1..aa7d0e3d6cb 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -25,13 +25,6 @@
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
struct WordBoundaryState
{
@@ -74,14 +67,14 @@ initcap_wbnext(void *state)
return wbstate->len;
}
-size_t
+static size_t
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return unicode_strlower(dest, destsize, src, srclen);
}
-size_t
+static size_t
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -97,13 +90,104 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
initcap_wbnext, &wbstate);
}
-size_t
+static size_t
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return unicode_strupper(dest, destsize, src, srclen);
}
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isdigit(wc, true);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalnum(wc, true);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_ispunct(wc, true);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+ .strlower = strlower_builtin,
+ .strtitle = strtitle_builtin,
+ .strupper = strupper_builtin,
+ .wc_isdigit = wc_isdigit_builtin,
+ .wc_isalpha = wc_isalpha_builtin,
+ .wc_isalnum = wc_isalnum_builtin,
+ .wc_isupper = wc_isupper_builtin,
+ .wc_islower = wc_islower_builtin,
+ .wc_isgraph = wc_isgraph_builtin,
+ .wc_isprint = wc_isprint_builtin,
+ .wc_ispunct = wc_ispunct_builtin,
+ .wc_isspace = wc_isspace_builtin,
+ .char_is_cased = char_is_cased_builtin,
+ .wc_tolower = wc_tolower_builtin,
+ .wc_toupper = wc_toupper_builtin,
+};
+
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
{
@@ -146,6 +230,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
+ if (!result->ctype_is_c)
+ result->ctype = &ctype_methods_builtin;
return result;
}
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 5185b0f7289..3e9a2e0cfaa 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -48,17 +48,17 @@
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
+static size_t strlower_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dst, size_t dstsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
static int strncoll_icu(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@@ -118,6 +118,25 @@ static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_tolower(wc);
+}
+
static const struct collate_methods collate_methods_icu = {
.strncoll = strncoll_icu,
.strnxfrm = strnxfrm_icu,
@@ -136,6 +155,77 @@ static const struct collate_methods collate_methods_icu_utf8 = {
.strxfrm_is_safe = true,
};
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+ .strlower = strlower_icu,
+ .strtitle = strtitle_icu,
+ .strupper = strupper_icu,
+ .wc_isdigit = wc_isdigit_icu,
+ .wc_isalpha = wc_isalpha_icu,
+ .wc_isalnum = wc_isalnum_icu,
+ .wc_isupper = wc_isupper_icu,
+ .wc_islower = wc_islower_icu,
+ .wc_isgraph = wc_isgraph_icu,
+ .wc_isprint = wc_isprint_icu,
+ .wc_ispunct = wc_ispunct_icu,
+ .wc_isspace = wc_isspace_icu,
+ .char_is_cased = char_is_cased_icu,
+ .wc_toupper = toupper_icu,
+ .wc_tolower = tolower_icu,
+};
#endif
pg_locale_t
@@ -206,6 +296,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result->collate = &collate_methods_icu_utf8;
else
result->collate = &collate_methods_icu;
+ result->ctype = &ctype_methods_icu;
return result;
#else
@@ -379,7 +470,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
}
}
-size_t
+static size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -399,7 +490,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -419,7 +510,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 8f9a8637897..1144c6ff304 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -43,13 +43,6 @@
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
static int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@@ -86,6 +79,239 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+ Assert(pg_database_encoding_max_length() == 1);
+ return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+ bool is_multibyte = pg_database_encoding_max_length() > 1;
+
+ if (is_multibyte && IS_HIGHBIT_SET(ch))
+ return true;
+ else
+ return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return toupper_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towupper_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return tolower_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towlower_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+ .strlower = strlower_libc_sb,
+ .strtitle = strtitle_libc_sb,
+ .strupper = strupper_libc_sb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_mb,
+ .wc_isalpha = wc_isalpha_libc_mb,
+ .wc_isalnum = wc_isalnum_libc_mb,
+ .wc_isupper = wc_isupper_libc_mb,
+ .wc_islower = wc_islower_libc_mb,
+ .wc_isgraph = wc_isgraph_libc_mb,
+ .wc_isprint = wc_isprint_libc_mb,
+ .wc_ispunct = wc_ispunct_libc_mb,
+ .wc_isspace = wc_isspace_libc_mb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_mb,
+ .wc_tolower = tolower_libc_mb,
+};
+
static const struct collate_methods collate_methods_libc = {
.strncoll = strncoll_libc,
.strnxfrm = strnxfrm_libc,
@@ -120,36 +346,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
};
#endif
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strlower_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strupper_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
@@ -482,6 +678,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
#endif
result->collate = &collate_methods_libc;
}
+ if (!result->ctype_is_c)
+ {
+ if (GetDatabaseEncoding() == PG_UTF8)
+ result->ctype = &ctype_methods_libc_utf8;
+ else if (pg_database_encoding_max_length() > 1)
+ result->ctype = &ctype_methods_libc_other_mb;
+ else
+ result->ctype = &ctype_methods_libc_sb;
+ }
return result;
}
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index ec42ca3da4c..b64135ab389 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -12,6 +12,8 @@
#ifndef _PG_LOCALE_
#define _PG_LOCALE_
+#include "mb/pg_wchar.h"
+
#ifdef USE_ICU
#include <unicode/ucol.h>
#endif
@@ -77,6 +79,49 @@ struct collate_methods
bool strxfrm_is_safe;
};
+struct ctype_methods
+{
+ /* case mapping: LOWER()/INITCAP()/UPPER() */
+ size_t (*strlower) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+ size_t (*strtitle) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+ size_t (*strupper) (char *dest, size_t destsize,
+ const char *src, ssize_t srclen,
+ pg_locale_t locale);
+
+ /* required */
+ bool (*wc_isdigit) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isalpha) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isalnum) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isupper) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_islower) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isgraph) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isprint) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_ispunct) (pg_wchar wc, pg_locale_t locale);
+ bool (*wc_isspace) (pg_wchar wc, pg_locale_t locale);
+ pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale);
+ pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale);
+
+ /* required */
+ bool (*char_is_cased) (char ch, pg_locale_t locale);
+
+ /*
+ * Optional. If defined, will only be called for single-byte encodings. If
+ * not defined, or if the encoding is multibyte, will fall back to
+ * pg_strlower().
+ */
+ char (*char_tolower) (unsigned char ch, pg_locale_t locale);
+
+ /*
+ * For regex and pattern matching efficiency, the maximum char value
+ * supported by the above methods. If zero, limit is set by regex code.
+ */
+ pg_wchar max_chr;
+};
+
/*
* We use a discriminated union to hold either a locale_t or an ICU collator.
* pg_locale_t is occasionally checked for truth, so make it a pointer.
@@ -102,6 +147,7 @@ struct pg_locale_struct
bool is_default;
const struct collate_methods *collate; /* NULL if collate_is_c */
+ const struct ctype_methods *ctype; /* NULL if ctype_is_c */
union
{
@@ -124,6 +170,9 @@ extern void init_database_collation(void);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
+extern bool char_is_cased(char ch, pg_locale_t locale);
+extern bool char_tolower_enabled(pg_locale_t locale);
+extern char char_tolower(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 9f83ecf181f..a869d6b7283 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1826,7 +1826,6 @@ PGTargetServerType
PGTernaryBool
PGTransactionStatusType
PGVerbosity
-PG_Locale_Strategy
PG_Lock_Status
PG_init_t
PGcancel
--
2.34.1
From c7f7159cdd31cc1f10ac35e43afc39b2074ecefe Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 7 Oct 2024 12:51:27 -0700
Subject: [PATCH v12 2/4] Remove provider field from pg_locale_t.
The behavior of pg_locale_t is entirely specified by methods, so a
separate provider field is no longer necessary.
---
src/backend/utils/adt/pg_locale_builtin.c | 1 -
src/backend/utils/adt/pg_locale_icu.c | 11 -----------
src/backend/utils/adt/pg_locale_libc.c | 6 ------
src/include/utils/pg_locale.h | 1 -
4 files changed, 19 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index aa7d0e3d6cb..4db21882ac3 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -226,7 +226,6 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->info.builtin.locale = MemoryContextStrdup(context, locstr);
- result->provider = COLLPROVIDER_BUILTIN;
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 3e9a2e0cfaa..e4f0398c217 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -288,7 +288,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
result->info.icu.ucol = collator;
- result->provider = COLLPROVIDER_ICU;
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
@@ -545,8 +544,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
int result;
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
status = U_ZERO_ERROR;
@@ -574,8 +571,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
init_icu_converter();
ulen = uchar_length(icu_converter, src, srclen);
@@ -620,8 +615,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
uint32_t state[2];
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
uiter_setUTF8(&iter, src, srclen);
@@ -788,8 +781,6 @@ strncoll_icu(const char *arg1, ssize_t len1,
*uchar2;
int result;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
#ifdef HAVE_UCOL_STRCOLLUTF8
Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -838,8 +829,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
Assert(GetDatabaseEncoding() != PG_UTF8);
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 1144c6ff304..1582f8cdd2a 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -662,7 +662,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
loc = make_libc_collator(collate, ctype);
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
- result->provider = COLLPROVIDER_LIBC;
result->deterministic = true;
result->collate_is_c = (strcmp(collate, "C") == 0) ||
(strcmp(collate, "POSIX") == 0);
@@ -782,8 +781,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
const char *arg2n;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (bufsize1 + bufsize2 > TEXTBUFLEN)
buf = palloc(bufsize1 + bufsize2);
@@ -838,8 +835,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t bufsize = srclen + 1;
size_t result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (srclen == -1)
return strxfrm_l(dest, src, destsize, locale->info.lt);
@@ -948,7 +943,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
int r;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
Assert(GetDatabaseEncoding() == PG_UTF8);
if (len1 == -1)
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index b64135ab389..d9650cec5cc 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -140,7 +140,6 @@ struct ctype_methods
*/
struct pg_locale_struct
{
- char provider;
bool deterministic;
bool collate_is_c;
bool ctype_is_c;
--
2.34.1
From ad4371e6f641479275dbb21cda7d615393831271 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Mon, 7 Oct 2024 13:36:44 -0700
Subject: [PATCH v12 3/4] Make provider data in pg_locale_t an opaque pointer.
---
src/backend/utils/adt/pg_locale_builtin.c | 11 +-
src/backend/utils/adt/pg_locale_icu.c | 40 ++++--
src/backend/utils/adt/pg_locale_libc.c | 149 +++++++++++++++-------
src/include/utils/pg_locale.h | 16 +--
4 files changed, 143 insertions(+), 73 deletions(-)
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index 4db21882ac3..77768735149 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -26,6 +26,11 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern char *get_collation_actual_version_builtin(const char *collcollate);
+struct builtin_provider
+{
+ const char *locale;
+};
+
struct WordBoundaryState
{
const char *str;
@@ -192,6 +197,7 @@ pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
{
const char *locstr;
+ struct builtin_provider *builtin;
pg_locale_t result;
if (collid == DEFAULT_COLLATION_OID)
@@ -225,7 +231,10 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
- result->info.builtin.locale = MemoryContextStrdup(context, locstr);
+ builtin = MemoryContextAllocZero(context, sizeof(struct builtin_provider));
+ builtin->locale = MemoryContextStrdup(context, locstr);
+ result->provider_data = (void *) builtin;
+
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index e4f0398c217..7bd58f26c44 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -51,6 +51,12 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
#ifdef USE_ICU
+struct icu_provider
+{
+ const char *locale;
+ UCollator *ucol;
+};
+
extern UCollator *pg_ucol_open(const char *loc_str);
static size_t strlower_icu(char *dst, size_t dstsize, const char *src,
@@ -235,6 +241,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
bool deterministic;
const char *iculocstr;
const char *icurules = NULL;
+ struct icu_provider *icu;
UCollator *collator;
pg_locale_t result;
@@ -286,8 +293,12 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
collator = make_icu_collator(iculocstr, icurules);
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
- result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
- result->info.icu.ucol = collator;
+
+ icu = MemoryContextAllocZero(context, sizeof(struct icu_provider));
+ icu->locale = MemoryContextStrdup(context, iculocstr);
+ icu->ucol = collator;
+ result->provider_data = (void *) icu;
+
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
@@ -543,11 +554,12 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
{
int result;
UErrorCode status;
+ struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
Assert(GetDatabaseEncoding() == PG_UTF8);
status = U_ZERO_ERROR;
- result = ucol_strcollUTF8(locale->info.icu.ucol,
+ result = ucol_strcollUTF8(icu->ucol,
arg1, len1,
arg2, len2,
&status);
@@ -571,6 +583,8 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t uchar_bsize;
Size result_bsize;
+ struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
init_icu_converter();
ulen = uchar_length(icu_converter, src, srclen);
@@ -584,7 +598,7 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
ulen = uchar_convert(icu_converter, uchar, ulen + 1, src, srclen);
- result_bsize = ucol_getSortKey(locale->info.icu.ucol,
+ result_bsize = ucol_getSortKey(icu->ucol,
uchar, ulen,
(uint8_t *) dest, destsize);
@@ -615,12 +629,14 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
uint32_t state[2];
UErrorCode status;
+ struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() == PG_UTF8);
uiter_setUTF8(&iter, src, srclen);
state[0] = state[1] = 0; /* won't need that again */
status = U_ZERO_ERROR;
- result = ucol_nextSortKeyPart(locale->info.icu.ucol,
+ result = ucol_nextSortKeyPart(icu->ucol,
&iter,
state,
(uint8_t *) dest,
@@ -727,11 +743,13 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
UErrorCode status;
int32_t len_dest;
+ struct icu_provider *icu = (struct icu_provider *) mylocale->provider_data;
+
len_dest = len_source; /* try first with same length */
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
- mylocale->info.icu.locale, &status);
+ icu->locale, &status);
if (status == U_BUFFER_OVERFLOW_ERROR)
{
/* try again with adjusted length */
@@ -739,7 +757,7 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
- mylocale->info.icu.locale, &status);
+ icu->locale, &status);
}
if (U_FAILURE(status))
ereport(ERROR,
@@ -781,6 +799,8 @@ strncoll_icu(const char *arg1, ssize_t len1,
*uchar2;
int result;
+ struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
#ifdef HAVE_UCOL_STRCOLLUTF8
Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -803,7 +823,7 @@ strncoll_icu(const char *arg1, ssize_t len1,
ulen1 = uchar_convert(icu_converter, uchar1, ulen1 + 1, arg1, len1);
ulen2 = uchar_convert(icu_converter, uchar2, ulen2 + 1, arg2, len2);
- result = ucol_strcoll(locale->info.icu.ucol,
+ result = ucol_strcoll(icu->ucol,
uchar1, ulen1,
uchar2, ulen2);
@@ -829,6 +849,8 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
size_t uchar_bsize;
Size result_bsize;
+ struct icu_provider *icu = (struct icu_provider *) locale->provider_data;
+
/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -848,7 +870,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
uiter_setString(&iter, uchar, ulen);
state[0] = state[1] = 0; /* won't need that again */
status = U_ZERO_ERROR;
- result_bsize = ucol_nextSortKeyPart(locale->info.icu.ucol,
+ result_bsize = ucol_nextSortKeyPart(icu->ucol,
&iter,
state,
(uint8_t *) dest,
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 1582f8cdd2a..1d990a612b4 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -1,3 +1,4 @@
+
/*-----------------------------------------------------------------------
*
* PostgreSQL locale utilities for libc
@@ -41,6 +42,11 @@
*/
#define TEXTBUFLEN 1024
+struct libc_provider
+{
+ locale_t lt;
+};
+
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
static int strncoll_libc(const char *arg1, ssize_t len1,
@@ -82,116 +88,136 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
static bool
wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isdigit_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isdigit_l((unsigned char) wc, libc->lt);
}
static bool
wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isalpha_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isalpha_l((unsigned char) wc, libc->lt);
}
static bool
wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isalnum_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isalnum_l((unsigned char) wc, libc->lt);
}
static bool
wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isupper_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isupper_l((unsigned char) wc, libc->lt);
}
static bool
wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return islower_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return islower_l((unsigned char) wc, libc->lt);
}
static bool
wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isgraph_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isgraph_l((unsigned char) wc, libc->lt);
}
static bool
wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isprint_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isprint_l((unsigned char) wc, libc->lt);
}
static bool
wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return ispunct_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return ispunct_l((unsigned char) wc, libc->lt);
}
static bool
wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
{
- return isspace_l((unsigned char) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return isspace_l((unsigned char) wc, libc->lt);
}
static bool
wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswdigit_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswdigit_l((wint_t) wc, libc->lt);
}
static bool
wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswalpha_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswalpha_l((wint_t) wc, libc->lt);
}
static bool
wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswalnum_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswalnum_l((wint_t) wc, libc->lt);
}
static bool
wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswupper_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswupper_l((wint_t) wc, libc->lt);
}
static bool
wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswlower_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswlower_l((wint_t) wc, libc->lt);
}
static bool
wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswgraph_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswgraph_l((wint_t) wc, libc->lt);
}
static bool
wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswprint_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswprint_l((wint_t) wc, libc->lt);
}
static bool
wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswpunct_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswpunct_l((wint_t) wc, libc->lt);
}
static bool
wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
{
- return iswspace_l((wint_t) wc, locale->info.lt);
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+ return iswspace_l((wint_t) wc, libc->lt);
}
static char
char_tolower_libc(unsigned char ch, pg_locale_t locale)
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(pg_database_encoding_max_length() == 1);
- return tolower_l(ch, locale->info.lt);
+ return tolower_l(ch, libc->lt);
}
static bool
@@ -199,19 +225,23 @@ char_is_cased_libc(char ch, pg_locale_t locale)
{
bool is_multibyte = pg_database_encoding_max_length() > 1;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
if (is_multibyte && IS_HIGHBIT_SET(ch))
return true;
else
- return isalpha_l((unsigned char) ch, locale->info.lt);
+ return isalpha_l((unsigned char) ch, libc->lt);
}
static pg_wchar
toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() != PG_UTF8);
if (wc <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) wc, locale->info.lt);
+ return toupper_l((unsigned char) wc, libc->lt);
else
return wc;
}
@@ -219,10 +249,12 @@ toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
static pg_wchar
toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() == PG_UTF8);
if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) wc, locale->info.lt);
+ return towupper_l((wint_t) wc, libc->lt);
else
return wc;
}
@@ -230,10 +262,12 @@ toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
static pg_wchar
tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() != PG_UTF8);
if (wc <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) wc, locale->info.lt);
+ return tolower_l((unsigned char) wc, libc->lt);
else
return wc;
}
@@ -241,10 +275,12 @@ tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
static pg_wchar
tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() == PG_UTF8);
if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) wc, locale->info.lt);
+ return towlower_l((wint_t) wc, libc->lt);
else
return wc;
}
@@ -355,7 +391,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (srclen + 1 <= destsize)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
char *p;
if (srclen + 1 > destsize)
@@ -376,7 +412,7 @@ strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (locale->is_default)
*p = pg_tolower((unsigned char) *p);
else
- *p = tolower_l((unsigned char) *p, loc);
+ *p = tolower_l((unsigned char) *p, libc->lt);
}
}
@@ -387,7 +423,8 @@ static size_t
strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
size_t result_size;
wchar_t *workspace;
char *result;
@@ -409,7 +446,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
char2wchar(workspace, srclen + 1, src, srclen, locale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+ workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt);
/*
* Make result large enough; case change might change number of bytes
@@ -440,7 +477,7 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (srclen + 1 <= destsize)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
int wasalnum = false;
char *p;
@@ -466,11 +503,11 @@ strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
else
{
if (wasalnum)
- *p = tolower_l((unsigned char) *p, loc);
+ *p = tolower_l((unsigned char) *p, libc->lt);
else
- *p = toupper_l((unsigned char) *p, loc);
+ *p = toupper_l((unsigned char) *p, libc->lt);
}
- wasalnum = isalnum_l((unsigned char) *p, loc);
+ wasalnum = isalnum_l((unsigned char) *p, libc->lt);
}
}
@@ -481,7 +518,8 @@ static size_t
strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
int wasalnum = false;
size_t result_size;
wchar_t *workspace;
@@ -506,10 +544,10 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
if (wasalnum)
- workspace[curr_char] = towlower_l(workspace[curr_char], loc);
+ workspace[curr_char] = towlower_l(workspace[curr_char], libc->lt);
else
- workspace[curr_char] = towupper_l(workspace[curr_char], loc);
- wasalnum = iswalnum_l(workspace[curr_char], loc);
+ workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt);
+ wasalnum = iswalnum_l(workspace[curr_char], libc->lt);
}
/*
@@ -541,7 +579,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (srclen + 1 <= destsize)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
char *p;
memcpy(dest, src, srclen);
@@ -559,7 +597,7 @@ strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
if (locale->is_default)
*p = pg_toupper((unsigned char) *p);
else
- *p = toupper_l((unsigned char) *p, loc);
+ *p = toupper_l((unsigned char) *p, libc->lt);
}
}
@@ -570,7 +608,8 @@ static size_t
strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- locale_t loc = locale->info.lt;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
size_t result_size;
wchar_t *workspace;
char *result;
@@ -592,7 +631,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
char2wchar(workspace, srclen + 1, src, srclen, locale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
- workspace[curr_char] = towupper_l(workspace[curr_char], loc);
+ workspace[curr_char] = towupper_l(workspace[curr_char], libc->lt);
/*
* Make result large enough; case change might change number of bytes
@@ -620,6 +659,7 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
const char *collate;
const char *ctype;
locale_t loc;
+ struct libc_provider *libc;
pg_locale_t result;
if (collid == DEFAULT_COLLATION_OID)
@@ -658,16 +698,19 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
ReleaseSysCache(tp);
}
-
loc = make_libc_collator(collate, ctype);
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
+
+ libc = MemoryContextAllocZero(context, sizeof(struct libc_provider));
+ libc->lt = loc;
+ result->provider_data = (void *) libc;
+
result->deterministic = true;
result->collate_is_c = (strcmp(collate, "C") == 0) ||
(strcmp(collate, "POSIX") == 0);
result->ctype_is_c = (strcmp(ctype, "C") == 0) ||
(strcmp(ctype, "POSIX") == 0);
- result->info.lt = loc;
if (!result->collate_is_c)
{
#ifdef WIN32
@@ -781,6 +824,8 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
const char *arg2n;
int result;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
if (bufsize1 + bufsize2 > TEXTBUFLEN)
buf = palloc(bufsize1 + bufsize2);
@@ -811,7 +856,7 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
arg2n = buf2;
}
- result = strcoll_l(arg1n, arg2n, locale->info.lt);
+ result = strcoll_l(arg1n, arg2n, libc->lt);
if (buf != sbuf)
pfree(buf);
@@ -835,8 +880,10 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t bufsize = srclen + 1;
size_t result;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
if (srclen == -1)
- return strxfrm_l(dest, src, destsize, locale->info.lt);
+ return strxfrm_l(dest, src, destsize, libc->lt);
if (bufsize > TEXTBUFLEN)
buf = palloc(bufsize);
@@ -845,7 +892,7 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
memcpy(buf, src, srclen);
buf[srclen] = '\0';
- result = strxfrm_l(dest, buf, destsize, locale->info.lt);
+ result = strxfrm_l(dest, buf, destsize, libc->lt);
if (buf != sbuf)
pfree(buf);
@@ -943,6 +990,8 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
int r;
int result;
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
Assert(GetDatabaseEncoding() == PG_UTF8);
if (len1 == -1)
@@ -987,7 +1036,7 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
((LPWSTR) a2p)[r] = 0;
errno = 0;
- result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
+ result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, libc->lt);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
@@ -1116,8 +1165,10 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
/* Use wcstombs_l for nondefault locales */
- result = wcstombs_l(to, from, tolen, locale->info.lt);
+ result = wcstombs_l(to, from, tolen, libc->lt);
}
return result;
@@ -1176,8 +1227,10 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
}
else
{
+ struct libc_provider *libc = (struct libc_provider *) locale->provider_data;
+
/* Use mbstowcs_l for nondefault locales */
- result = mbstowcs_l(to, str, tolen, locale->info.lt);
+ result = mbstowcs_l(to, str, tolen, libc->lt);
}
pfree(str);
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index d9650cec5cc..74dd8435a6b 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -148,21 +148,7 @@ struct pg_locale_struct
const struct collate_methods *collate; /* NULL if collate_is_c */
const struct ctype_methods *ctype; /* NULL if ctype_is_c */
- union
- {
- struct
- {
- const char *locale;
- } builtin;
- locale_t lt;
-#ifdef USE_ICU
- struct
- {
- const char *locale;
- UCollator *ucol;
- } icu;
-#endif
- } info;
+ void *provider_data;
};
extern void init_database_collation(void);
--
2.34.1
From 95c70ad9d8a2f90967a5d62b276d96756dfae172 Mon Sep 17 00:00:00 2001
From: Jeff Davis <[email protected]>
Date: Wed, 9 Oct 2024 10:00:58 -0700
Subject: [PATCH v12 4/4] Don't include ICU headers in pg_locale.h.
---
src/backend/commands/collationcmds.c | 4 ++++
src/backend/utils/adt/formatting.c | 4 ----
src/backend/utils/adt/pg_locale.c | 4 ++++
src/backend/utils/adt/pg_locale_icu.c | 1 +
src/backend/utils/adt/varlena.c | 4 ++++
src/include/utils/pg_locale.h | 4 ----
6 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 8acbfbbeda0..a57fe93c387 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -14,6 +14,10 @@
*/
#include "postgres.h"
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
#include "access/htup_details.h"
#include "access/table.h"
#include "access/xact.h"
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 3960235e14e..2ba4ca7f0f2 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -71,10 +71,6 @@
#include <limits.h>
#include <wctype.h>
-#ifdef USE_ICU
-#include <unicode/ustring.h>
-#endif
-
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
#include "common/int.h"
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index cdb4950ac47..e3ddec2d57d 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -54,6 +54,10 @@
#include <time.h>
+#ifdef USE_ICU
+#include <unicode/ucol.h>
+#endif
+
#include "access/htup_details.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_database.h"
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index 7bd58f26c44..0469c52b669 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -13,6 +13,7 @@
#ifdef USE_ICU
#include <unicode/ucnv.h>
+#include <unicode/ucol.h>
#include <unicode/ustring.h>
/*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 34796f2e27c..c57262e1888 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -17,6 +17,10 @@
#include <ctype.h>
#include <limits.h>
+#ifdef USE_ICU
+#include <unicode/uchar.h>
+#endif
+
#include "access/detoast.h"
#include "access/toast_compression.h"
#include "catalog/pg_collation.h"
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 74dd8435a6b..acb4890a78a 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -14,10 +14,6 @@
#include "mb/pg_wchar.h"
-#ifdef USE_ICU
-#include <unicode/ucol.h>
-#endif
-
/* use for libc locale names */
#define LOCALE_NAME_BUFLEN 128
--
2.34.1