On Sun, Jun 12, 2022 at 11:59 AM Thomas Munro <thomas.mu...@gmail.com> wrote: > > On Sat, Jun 11, 2022 at 4:21 PM Peter Geoghegan <p...@bowt.ie> wrote: > > What about "time travel collations", but without the time travel part? > > That is, what about supporting multiple ICU versions per cluster, but > > not per database? So you could upgrade the OS and Postgres, using > > standard packages that typically just use the latest ICU version -- > > typically, but not always. If you happen to have been on an older > > version of ICU on upgrade, then that version of ICU will still work at > > the level of a whole database -- your database. Maybe you can create > > new databases with old and new ICU versions if you want to. > > > > That obviously runs into the problem of needing to eventually do a > > dump and reload -- but I suppose that "eventually" could be a very > > long time. At least the OS package doesn't declare one version of ICU > > the blessed version, now and forever, effectively vendoring ICU in a > > backdoor fashion. At least old databases have significant runway, > > while at the same time new databases that want to use the same > > standard Postgres package aren't forced to use the same ancient ICU > > version. > > Hmm. I think that's effectively what you'd get using my "distinct > collation" patch (v1, or this much better v3, attached), if you put > version prefixes in colliculocale, and updated them in the template > database after an OS upgrade to affect new databases. I realise you > probably mean something a little more automatic...
Thinking some more about what you said above: really, most people only care about the default collation. I'm not yet sure what I think initdb should put into pg_collation when importing the initial set of collation objects in the "distinct" world (perhaps an un-prefixed and a prefixed variant of each, with names ending -x-icu and -x-icu63?), but as for the default collation, I should point out that the "distinct" patch already gives you a nailed-to-the-ground database approximately as you described above if you just do something like this: postgres=# create database db2 locale_provider = icu icu_locale = '67:en' template = template0 ...; Small bugfix attached (v3 was accidentally calling uiter_setUTF8() and u_errorName() directly in a couple of places).
From 265d6ea0d292255c164015814f4e810186d2ffb3 Mon Sep 17 00:00:00 2001 From: Thomas Munro <thomas.mu...@gmail.com> Date: Wed, 8 Jun 2022 17:43:53 +1200 Subject: [PATCH v4] WIP: multi ICU, distinct collations XXX This is highly experimental code --- src/backend/access/hash/hashfunc.c | 16 +- src/backend/utils/adt/formatting.c | 53 ++++- src/backend/utils/adt/pg_locale.c | 360 ++++++++++++++++++++++++++++- src/backend/utils/adt/varchar.c | 16 +- src/backend/utils/adt/varlena.c | 56 ++--- src/include/utils/pg_locale.h | 72 ++++++ src/tools/pgindent/typedefs.list | 3 + 7 files changed, 510 insertions(+), 66 deletions(-) diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index b57ed946c4..0a61538efd 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -298,11 +298,11 @@ hashtext(PG_FUNCTION_ARGS) ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, NULL, 0); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); + PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, buf, bsize); result = hash_any(buf, bsize); @@ -355,11 +355,11 @@ hashtextextended(PG_FUNCTION_ARGS) ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, NULL, 0); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); + PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, buf, bsize); result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index e909c1a200..97c96cb7f6 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1602,6 +1602,11 @@ typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity, const UChar *src, int32_t srcLength, const char *locale, UErrorCode *pErrorCode); +typedef int32_t (*ICU_Convert_BI_Func) (UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *bi, + const char *locale, + UErrorCode *pErrorCode); static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, @@ -1626,18 +1631,41 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale, } if (U_FAILURE(status)) ereport(ERROR, - (errmsg("case conversion failed: %s", u_errorName(status)))); + (errmsg("case conversion failed: %s", + PG_ICU_LIB(mylocale)->errorName(status)))); return len_dest; } +/* + * Like icu_convert_case, but func takes a break iterator (which we don't + * make use of). + */ static int32_t -u_strToTitle_default_BI(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode) +icu_convert_case_bi(ICU_Convert_BI_Func func, pg_locale_t mylocale, + UChar **buff_dest, UChar *buff_source, int32_t len_source) { - return u_strToTitle(dest, destCapacity, src, srcLength, - NULL, locale, pErrorCode); + UErrorCode status; + int32_t len_dest; + + len_dest = len_source; /* try first with same length */ + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL, + mylocale->info.icu.locale, &status); + if (status == U_BUFFER_OVERFLOW_ERROR) + { + /* try again with adjusted length */ + pfree(*buff_dest); + *buff_dest = palloc(len_dest * sizeof(**buff_dest)); + status = U_ZERO_ERROR; + len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL, + mylocale->info.icu.locale, &status); + } + if (U_FAILURE(status)) + ereport(ERROR, + (errmsg("case conversion failed: %s", + PG_ICU_LIB(mylocale)->errorName(status)))); + return len_dest; } #endif /* USE_ICU */ @@ -1705,7 +1733,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid) UChar *buff_conv; len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToLower, mylocale, + len_conv = icu_convert_case(PG_ICU_LIB(mylocale)->strToLower, + mylocale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); @@ -1827,7 +1856,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) UChar *buff_conv; len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToUpper, mylocale, + len_conv = icu_convert_case(PG_ICU_LIB(mylocale)->strToUpper, + mylocale, &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); @@ -1950,8 +1980,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) UChar *buff_conv; len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes); - len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale, - &buff_conv, buff_uchar, len_uchar); + len_conv = icu_convert_case_bi(PG_ICU_LIB(mylocale)->strToTitle, + mylocale, + &buff_conv, buff_uchar, len_uchar); icu_from_uchar(&result, buff_conv, len_conv); pfree(buff_uchar); pfree(buff_conv); diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a0490a7522..8dec0ff8c9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -58,6 +58,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_control.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" #include "utils/builtins.h" #include "utils/formatting.h" #include "utils/hsearch.h" @@ -68,6 +69,7 @@ #ifdef USE_ICU #include <unicode/ucnv.h> +#include <unicode/ustring.h> #endif #ifdef __GLIBC__ @@ -78,8 +80,27 @@ #include <shlwapi.h> #endif +#ifdef HAVE_DLOPEN +#include <dlfcn.h> +#endif + #define MAX_L10N_DATA 80 +#ifdef USE_ICU + +/* + * We don't want to call into dlopen'd ICU libraries that are newer than the + * one we were compiled and linked against, just in case there is an + * incompatible API change. + */ +#define PG_MAX_ICU_MAJOR_VERSION U_ICU_VERSION_MAJOR_NUM + +/* An old ICU release that we know has the right API. */ +/* XXX check if this is actually true, and if we could go back further */ +#define PG_MIN_ICU_MAJOR_VERSION 54 + +#endif + /* GUC settings */ char *locale_messages; @@ -1435,29 +1456,337 @@ lc_ctype_is_c(Oid collation) return (lookup_collation_cache(collation, true))->ctype_is_c; } +#ifdef USE_ICU + struct pg_locale_struct default_locale; +/* Linked list of ICU libraries we have loaded. */ +static pg_icu_library *icu_library_list = NULL; + +/* + * Free an ICU library. pg_icu_library objects that are successfully + * constructed stick around for the lifetime of the backend, but this is used + * to clean up if initialization fails. + */ +static void +free_icu_library(pg_icu_library *lib) +{ + if (lib->libicui18n_handle) + dlclose(lib->libicui18n_handle); + if (lib->libicuuc_handle) + dlclose(lib->libicuuc_handle); + pfree(lib); +} + +static void * +get_icu_function(void *handle, const char *function, int version) +{ + char name[80]; + + snprintf(name, sizeof(name), "%s_%d", function, version); + + return dlsym(handle, name); +} + +/* + * Probe a dynamically loaded library to see which major version of ICU it + * contains. + */ +static int +get_icu_library_major_version(void *handle) +{ + for (int i = PG_MIN_ICU_MAJOR_VERSION; i <= PG_MAX_ICU_MAJOR_VERSION; ++i) + if (get_icu_function(handle, "ucol_open", i) || + get_icu_function(handle, "u_strToUpper", i)) + return i; + + /* + * It's a later version we don't dare use, an old version we don't + * support, an ICU build with symbol suffixes disabled, or not ICU. + */ + return -1; +} + +/* + * We have to load a couple of different libraries, so we'll reuse the code to + * do that. + */ +static void * +load_icu_library(pg_icu_library *lib, const char *name) +{ + void *handle; + int found_major_version; + + handle = dlopen(name, RTLD_NOW | RTLD_GLOBAL); + if (handle == NULL) + { + int errno_save = errno; + + free_icu_library(lib); + errno = errno_save; + + ereport(ERROR, + (errmsg("could not load library \"%s\": %m", name))); + } + + found_major_version = get_icu_library_major_version(handle); + if (found_major_version < 0) + { + free_icu_library(lib); + ereport(ERROR, + (errmsg("could not find compatible ICU major version in library \"%s\"", + name))); + } + + if (found_major_version != lib->major_version) + { + free_icu_library(lib); + ereport(ERROR, + (errmsg("expected to find ICU major version %d in library \"%s\", but found %d", + lib->major_version, name, found_major_version))); + } + + return handle; +} + +/* + * Given an ICU major version number, return the object we need to access it, + * or fail while trying to load it. + */ +static pg_icu_library * +get_icu_library(int major_version) +{ + pg_icu_library *lib; + + Assert(major_version >= PG_MIN_ICU_MAJOR_VERSION && + major_version <= PG_MAX_ICU_MAJOR_VERSION); + + /* Try to find it in our list of existing libraries. */ + for (lib = icu_library_list; lib; lib = lib->next) + if (lib->major_version == major_version) + return lib; + + /* Make a new entry. */ + lib = MemoryContextAllocZero(TopMemoryContext, sizeof(*lib)); + if (major_version == U_ICU_VERSION_MAJOR_NUM) + { + /* + * This is the version we were compiled and linked against. Simply + * assign the function pointers. + * + * These assignments will fail to compile if an incompatible API + * change is made to some future version of ICU, at which point we + * might need to consider special treatment for different major + * version ranges, with intermediate trampoline functions. + */ + lib->major_version = major_version; + lib->open = ucol_open; + lib->close = ucol_close; + lib->getVersion = ucol_getVersion; + lib->versionToString = u_versionToString; + lib->strcoll = ucol_strcoll; + lib->strcollUTF8 = ucol_strcollUTF8; + lib->getSortKey = ucol_getSortKey; + lib->nextSortKeyPart = ucol_nextSortKeyPart; + lib->setUTF8 = uiter_setUTF8; + lib->errorName = u_errorName; + lib->strToUpper = u_strToUpper; + lib->strToLower = u_strToLower; + lib->strToTitle = u_strToTitle; + + /* + * Also assert the size of a couple of types used as output buffers, + * as a canary to tell us to add extra padding in the (unlikely) event + * that a later release makes these values smaller. + */ + StaticAssertStmt(U_MAX_VERSION_STRING_LENGTH == 20, + "u_versionToString output buffer size changed incompatibly"); + StaticAssertStmt(U_MAX_VERSION_LENGTH == 4, + "ucol_getVersion output buffer size changed incompatibly"); + } + else + { + /* This is an older version, so we'll need to use dlopen(). */ +#ifdef HAVE_DLOPEN + char libicui18n_name[MAXPGPATH]; + char libicuuc_name[MAXPGPATH]; + + /* + * We don't like to open versions newer than what we're linked + * against, to reduce the risk of an API change biting us. + */ + if (major_version > U_ICU_VERSION_MAJOR_NUM) + elog(ERROR, "ICU major version %d higher than linked version %d, refusing to open", + major_version, U_ICU_VERSION_MAJOR_NUM); + + lib->major_version = major_version; + + /* + * XXX using pkglib_path to look for libraries. The idea is that an + * administrator would drop symlinks in there. + * + * Perhaps should do something like dfmgr.c's dynamic_library_path + * search to find it? + */ + + /* Load the collation library. */ + snprintf(libicui18n_name, + sizeof(libicui18n_name), + "%s/libicui18n" DLSUFFIX ".%d", + pkglib_path, + major_version); + lib->libicui18n_handle = load_icu_library(lib, libicui18n_name); + + /* Load the ctype library. */ + snprintf(libicuuc_name, + sizeof(libicuuc_name), + "%s/libicuuc" DLSUFFIX ".%d", + pkglib_path, + major_version); + lib->libicuuc_handle = load_icu_library(lib, libicuuc_name); + + /* Look up all the functions we need. */ + lib->open = get_icu_function(lib->libicui18n_handle, + "ucol_open", + major_version); + lib->close = get_icu_function(lib->libicui18n_handle, + "ucol_close", + major_version); + lib->getVersion = get_icu_function(lib->libicui18n_handle, + "ucol_getVersion", + major_version); + lib->versionToString = get_icu_function(lib->libicui18n_handle, + "u_versionToString", + major_version); + lib->strcoll = get_icu_function(lib->libicui18n_handle, + "ucol_strcoll", + major_version); + lib->strcollUTF8 = get_icu_function(lib->libicui18n_handle, + "ucol_strcollUTF8", + major_version); + lib->getSortKey = get_icu_function(lib->libicui18n_handle, + "ucol_getSortKey", + major_version); + lib->nextSortKeyPart = get_icu_function(lib->libicui18n_handle, + "ucol_nextSortKeyPart", + major_version); + lib->setUTF8 = get_icu_function(lib->libicui18n_handle, + "uiter_setUTF8", + major_version); + lib->errorName = get_icu_function(lib->libicui18n_handle, + "u_errorName", + major_version); + lib->strToUpper = get_icu_function(lib->libicuuc_handle, + "u_strToUpper", + major_version); + lib->strToLower = get_icu_function(lib->libicuuc_handle, + "u_strToLower", + major_version); + lib->strToTitle = get_icu_function(lib->libicuuc_handle, + "u_strToTitle", + major_version); + if (!lib->open || + !lib->close || + !lib->getVersion || + !lib->versionToString || + !lib->strcoll || + !lib->strcollUTF8 || + !lib->getSortKey || + !lib->nextSortKeyPart || + !lib->setUTF8 || + !lib->errorName || + !lib->strToUpper || + !lib->strToLower || + !lib->strToTitle) + { + free_icu_library(lib); + ereport(ERROR, + (errmsg("could not find expected symbols in library \"%s\"", + libicui18n_name))); + } +#else + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("dynamically loaded ICU libraries are not supported in this build"))); +#endif + } + + lib->next = icu_library_list; + icu_library_list = lib; + + return lib; +} + +/* + * Look up the library to use for a given collcollate string. + */ +static pg_icu_library * +get_icu_library_for_collation(const char *collcollate, const char **rest) +{ + int major_version; + char *separator; + char *after_prefix; + + separator = strchr(collcollate, ':'); + + /* + * If it's a traditional value without a prefix, use the library we are + * linked against. + */ + if (separator == NULL) + { + *rest = collcollate; + return get_icu_library(U_ICU_VERSION_MAJOR_NUM); + } + + /* If it has a prefix, interpret it as an ICU major version. */ + major_version = strtol(collcollate, &after_prefix, 10); + if (after_prefix != separator) + elog(ERROR, + "could not parse ICU major library version: \"%s\"", + collcollate); + if (major_version < PG_MIN_ICU_MAJOR_VERSION || + major_version > PG_MAX_ICU_MAJOR_VERSION) + elog(ERROR, + "ICU major library verision out of supported range: \"%s\"", + collcollate); + + /* The part after the separate will be passed to the library. */ + *rest = separator + 1; + + return get_icu_library(major_version); +} + +#endif + void make_icu_collator(const char *iculocstr, struct pg_locale_struct *resultp) { #ifdef USE_ICU + pg_icu_library *lib; UCollator *collator; UErrorCode status; + lib = get_icu_library_for_collation(iculocstr, &iculocstr); status = U_ZERO_ERROR; - collator = ucol_open(iculocstr, &status); + collator = lib->open(iculocstr, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not open collator for locale \"%s\": %s", - iculocstr, u_errorName(status)))); + iculocstr, lib->errorName(status)))); - if (U_ICU_VERSION_MAJOR_NUM < 54) + /* + * XXX can we just drop this cruft and make 54 the minimum supported + * version? + */ + if (lib->major_version < 54) icu_set_collation_attributes(collator, iculocstr); /* We will leak this string if the caller errors later :-( */ resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr); resultp->info.icu.ucol = collator; + resultp->info.icu.lib = lib; #else /* not USE_ICU */ /* could get here if a collation was created by a build with ICU */ ereport(ERROR, @@ -1688,21 +2017,23 @@ get_collation_actual_version(char collprovider, const char *collcollate) #ifdef USE_ICU if (collprovider == COLLPROVIDER_ICU) { + pg_icu_library *lib; UCollator *collator; UErrorCode status; UVersionInfo versioninfo; char buf[U_MAX_VERSION_STRING_LENGTH]; + lib = get_icu_library_for_collation(collcollate, &collcollate); status = U_ZERO_ERROR; - collator = ucol_open(collcollate, &status); + collator = lib->open(collcollate, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not open collator for locale \"%s\": %s", - collcollate, u_errorName(status)))); - ucol_getVersion(collator, versioninfo); - ucol_close(collator); + collcollate, lib->errorName(status)))); + lib->getVersion(collator, versioninfo); + lib->close(collator); - u_versionToString(versioninfo, buf); + lib->versionToString(versioninfo, buf); collversion = pstrdup(buf); } else @@ -1770,6 +2101,8 @@ get_collation_actual_version(char collprovider, const char *collcollate) #ifdef USE_ICU + + /* * Converter object for converting between ICU's UChar strings and C strings * in database encoding. Since the database encoding doesn't change, we only @@ -1991,19 +2324,22 @@ void check_icu_locale(const char *icu_locale) { #ifdef USE_ICU + pg_icu_library *lib; UCollator *collator; UErrorCode status; + lib = get_icu_library_for_collation(icu_locale, &icu_locale); status = U_ZERO_ERROR; - collator = ucol_open(icu_locale, &status); + collator = lib->open(icu_locale, &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("could not open collator for locale \"%s\": %s", - icu_locale, u_errorName(status)))); + icu_locale, lib->errorName(status)))); - if (U_ICU_VERSION_MAJOR_NUM < 54) + /* XXX can we just drop this cruft? */ + if (lib->major_version < 54) icu_set_collation_attributes(collator, icu_locale); - ucol_close(collator); + lib->close(collator); #else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c index bbeb0a2653..187f066881 100644 --- a/src/backend/utils/adt/varchar.c +++ b/src/backend/utils/adt/varchar.c @@ -1025,11 +1025,11 @@ hashbpchar(PG_FUNCTION_ARGS) ulen = icu_to_uchar(&uchar, keydata, keylen); - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, NULL, 0); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); + PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, buf, bsize); result = hash_any(buf, bsize); @@ -1086,11 +1086,11 @@ hashbpcharextended(PG_FUNCTION_ARGS) ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key)); - bsize = ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, NULL, 0); + bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, NULL, 0); buf = palloc(bsize); - ucol_getSortKey(mylocale->info.icu.ucol, - uchar, ulen, buf, bsize); + PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale), + uchar, ulen, buf, bsize); result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1)); diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 919138eaf3..627fdeafa1 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -1666,13 +1666,14 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) UErrorCode status; status = U_ZERO_ERROR; - result = ucol_strcollUTF8(mylocale->info.icu.ucol, - arg1, len1, - arg2, len2, - &status); + result = PG_ICU_LIB(mylocale)->strcollUTF8(PG_ICU_COL(mylocale), + arg1, len1, + arg2, len2, + &status); if (U_FAILURE(status)) ereport(ERROR, - (errmsg("collation failed: %s", u_errorName(status)))); + (errmsg("collation failed: %s", + PG_ICU_LIB(mylocale)->errorName(status)))); } else #endif @@ -1685,9 +1686,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid) ulen1 = icu_to_uchar(&uchar1, arg1, len1); ulen2 = icu_to_uchar(&uchar2, arg2, len2); - result = ucol_strcoll(mylocale->info.icu.ucol, - uchar1, ulen1, - uchar2, ulen2); + result = PG_ICU_LIB(mylocale)->strcoll(PG_ICU_COL(mylocale), + uchar1, ulen1, + uchar2, ulen2); pfree(uchar1); pfree(uchar2); @@ -2389,13 +2390,14 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) UErrorCode status; status = U_ZERO_ERROR; - result = ucol_strcollUTF8(sss->locale->info.icu.ucol, - a1p, len1, - a2p, len2, - &status); + result = PG_ICU_LIB(sss->locale)->strcollUTF8(PG_ICU_COL(sss->locale), + a1p, len1, + a2p, len2, + &status); if (U_FAILURE(status)) ereport(ERROR, - (errmsg("collation failed: %s", u_errorName(status)))); + (errmsg("collation failed: %s", + PG_ICU_LIB(sss->locale)->errorName(status)))); } else #endif @@ -2408,9 +2410,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup) ulen1 = icu_to_uchar(&uchar1, a1p, len1); ulen2 = icu_to_uchar(&uchar2, a2p, len2); - result = ucol_strcoll(sss->locale->info.icu.ucol, - uchar1, ulen1, - uchar2, ulen2); + result = PG_ICU_LIB(sss->locale)->strcoll(PG_ICU_COL(sss->locale), + uchar1, ulen1, + uchar2, ulen2); pfree(uchar1); pfree(uchar2); @@ -2571,24 +2573,24 @@ varstr_abbrev_convert(Datum original, SortSupport ssup) uint32_t state[2]; UErrorCode status; - uiter_setUTF8(&iter, sss->buf1, len); + PG_ICU_LIB(sss->locale)->setUTF8(&iter, sss->buf1, len); state[0] = state[1] = 0; /* won't need that again */ status = U_ZERO_ERROR; - bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol, - &iter, - state, - (uint8_t *) sss->buf2, - Min(sizeof(Datum), sss->buflen2), - &status); + bsize = PG_ICU_LIB(sss->locale)->nextSortKeyPart(PG_ICU_COL(sss->locale), + &iter, + state, + (uint8_t *) sss->buf2, + Min(sizeof(Datum), sss->buflen2), + &status); if (U_FAILURE(status)) ereport(ERROR, (errmsg("sort key generation failed: %s", - u_errorName(status)))); + PG_ICU_LIB(sss->locale)->errorName(status)))); } else - bsize = ucol_getSortKey(sss->locale->info.icu.ucol, - uchar, ulen, - (uint8_t *) sss->buf2, sss->buflen2); + bsize = PG_ICU_LIB(sss->locale)->getSortKey(PG_ICU_COL(sss->locale), + uchar, ulen, + (uint8_t *) sss->buf2, sss->buflen2); } else #endif diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index e7385faef8..ca345d20af 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -17,6 +17,7 @@ #endif #ifdef USE_ICU #include <unicode/ucol.h> +#include <unicode/ubrk.h> #endif #include "utils/guc.h" @@ -74,6 +75,71 @@ extern struct lconv *PGLC_localeconv(void); extern void cache_locale_time(void); +#ifdef USE_ICU + +/* + * An ICU library version that we're either linked against or have loaded at + * runtime. + */ +typedef struct pg_icu_library +{ + int major_version; + void *libicui18n_handle; + void *libicuuc_handle; + UCollator *(*open) (const char *loc, UErrorCode *status); + void (*close) (UCollator *coll); + void (*getVersion) (const UCollator *coll, UVersionInfo info); + void (*versionToString) (const UVersionInfo versionArray, + char *versionString); + UCollationResult(*strcoll) (const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength); + UCollationResult(*strcollUTF8) (const UCollator *coll, + const char *source, + int32_t sourceLength, + const char *target, + int32_t targetLength, + UErrorCode *status); + int32_t (*getSortKey) (const UCollator *coll, + const UChar *source, + int32_t sourceLength, + uint8_t *result, + int32_t resultLength); + int32_t (*nextSortKeyPart) (const UCollator *coll, + UCharIterator *iter, + uint32_t state[2], + uint8_t *dest, + int32_t count, + UErrorCode *status); + void (*setUTF8) (UCharIterator *iter, + const char *s, + int32_t length); + const char *(*errorName) (UErrorCode code); + int32_t (*strToUpper) (UChar *dest, + int32_t destCapacity, + const UChar *src, + int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + int32_t (*strToLower) (UChar *dest, + int32_t destCapacity, + const UChar *src, + int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + int32_t (*strToTitle) (UChar *dest, + int32_t destCapacity, + const UChar *src, + int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode); + struct pg_icu_library *next; +} pg_icu_library; + +#endif /* * We define our own wrapper around locale_t so we can keep the same @@ -95,12 +161,18 @@ struct pg_locale_struct { const char *locale; UCollator *ucol; + pg_icu_library *lib; } icu; #endif int dummy; /* in case we have neither LOCALE_T nor ICU */ } info; }; +#ifdef USE_ICU +#define PG_ICU_LIB(x) ((x)->info.icu.lib) +#define PG_ICU_COL(x) ((x)->info.icu.ucol) +#endif + typedef struct pg_locale_struct *pg_locale_t; extern PGDLLIMPORT struct pg_locale_struct default_locale; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 4fb746930a..47aba7654b 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1100,6 +1100,7 @@ HeapTupleTableSlot HistControl HotStandbyState I32 +ICU_Convert_BI_Func ICU_Convert_Func ID INFIX @@ -2858,6 +2859,7 @@ TypeName U U32 U8 +UBreakIterator UChar UCharIterator UColAttribute @@ -3485,6 +3487,7 @@ pg_funcptr_t pg_gssinfo pg_hmac_ctx pg_hmac_errno +pg_icu_library pg_int64 pg_local_to_utf_combined pg_locale_t -- 2.30.2