On Sun, Jun 12, 2022 at 11:59 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
>
> On Sat, Jun 11, 2022 at 4:21 PM Peter Geoghegan <p...@bowt.ie> wrote:
> > What about "time travel collations", but without the time travel part?
> > That is, what about supporting multiple ICU versions per cluster, but
> > not per database? So you could upgrade the OS and Postgres, using
> > standard packages that typically just use the latest ICU version --
> > typically, but not always. If you happen to have been on an older
> > version of ICU on upgrade, then that version of ICU will still work at
> > the level of a whole database -- your database. Maybe you can create
> > new databases with old and new ICU versions if you want to.
> >
> > That obviously runs into the problem of needing to eventually do a
> > dump and reload -- but I suppose that "eventually" could be a very
> > long time. At least the OS package doesn't declare one version of ICU
> > the blessed version, now and forever, effectively vendoring ICU in a
> > backdoor fashion. At least old databases have significant runway,
> > while at the same time new databases that want to use the same
> > standard Postgres package aren't forced to use the same ancient ICU
> > version.
>
> Hmm.  I think that's effectively what you'd get using my "distinct
> collation" patch (v1, or this much better v3, attached), if you put
> version prefixes in colliculocale, and updated them in the template
> database after an OS upgrade to affect new databases.  I realise you
> probably mean something a little more automatic...

Thinking some more about what you said above: really, most people only
care about the default collation.  I'm not yet sure what I think
initdb should put into pg_collation when importing the initial set of
collation objects in the "distinct" world (perhaps an un-prefixed and
a prefixed variant of each, with names ending -x-icu and -x-icu63?),
but as for the default collation, I should point out that the
"distinct" patch already gives you a nailed-to-the-ground database
approximately as you described above if you just do something like
this:

postgres=# create database db2 locale_provider = icu icu_locale =
'67:en' template = template0 ...;

Small bugfix attached (v3 was accidentally calling uiter_setUTF8() and
u_errorName() directly in a couple of places).
From 265d6ea0d292255c164015814f4e810186d2ffb3 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Wed, 8 Jun 2022 17:43:53 +1200
Subject: [PATCH v4] WIP: multi ICU, distinct collations

XXX This is highly experimental code
---
 src/backend/access/hash/hashfunc.c |  16 +-
 src/backend/utils/adt/formatting.c |  53 ++++-
 src/backend/utils/adt/pg_locale.c  | 360 ++++++++++++++++++++++++++++-
 src/backend/utils/adt/varchar.c    |  16 +-
 src/backend/utils/adt/varlena.c    |  56 ++---
 src/include/utils/pg_locale.h      |  72 ++++++
 src/tools/pgindent/typedefs.list   |   3 +
 7 files changed, 510 insertions(+), 66 deletions(-)

diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index b57ed946c4..0a61538efd 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -298,11 +298,11 @@ hashtext(PG_FUNCTION_ARGS)
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+													 uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+											 uchar, ulen, buf, bsize);
 
 			result = hash_any(buf, bsize);
 
@@ -355,11 +355,11 @@ hashtextextended(PG_FUNCTION_ARGS)
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+													 uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+											 uchar, ulen, buf, bsize);
 
 			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
 
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index e909c1a200..97c96cb7f6 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1602,6 +1602,11 @@ typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
 									 const UChar *src, int32_t srcLength,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
+typedef int32_t (*ICU_Convert_BI_Func) (UChar *dest, int32_t destCapacity,
+										const UChar *src, int32_t srcLength,
+										UBreakIterator *bi,
+										const char *locale,
+										UErrorCode *pErrorCode);
 
 static int32_t
 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
@@ -1626,18 +1631,41 @@ icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
 	}
 	if (U_FAILURE(status))
 		ereport(ERROR,
-				(errmsg("case conversion failed: %s", u_errorName(status))));
+				(errmsg("case conversion failed: %s",
+						PG_ICU_LIB(mylocale)->errorName(status))));
 	return len_dest;
 }
 
+/*
+ * Like icu_convert_case, but func takes a break iterator (which we don't
+ * make use of).
+ */
 static int32_t
-u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
-						const UChar *src, int32_t srcLength,
-						const char *locale,
-						UErrorCode *pErrorCode)
+icu_convert_case_bi(ICU_Convert_BI_Func func, pg_locale_t mylocale,
+					UChar **buff_dest, UChar *buff_source, int32_t len_source)
 {
-	return u_strToTitle(dest, destCapacity, src, srcLength,
-						NULL, locale, pErrorCode);
+	UErrorCode	status;
+	int32_t		len_dest;
+
+	len_dest = len_source;		/* try first with same length */
+	*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+	status = U_ZERO_ERROR;
+	len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL,
+					mylocale->info.icu.locale, &status);
+	if (status == U_BUFFER_OVERFLOW_ERROR)
+	{
+		/* try again with adjusted length */
+		pfree(*buff_dest);
+		*buff_dest = palloc(len_dest * sizeof(**buff_dest));
+		status = U_ZERO_ERROR;
+		len_dest = func(*buff_dest, len_dest, buff_source, len_source, NULL,
+						mylocale->info.icu.locale, &status);
+	}
+	if (U_FAILURE(status))
+		ereport(ERROR,
+				(errmsg("case conversion failed: %s",
+						PG_ICU_LIB(mylocale)->errorName(status))));
+	return len_dest;
 }
 
 #endif							/* USE_ICU */
@@ -1705,7 +1733,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 			UChar	   *buff_conv;
 
 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToLower, mylocale,
+			len_conv = icu_convert_case(PG_ICU_LIB(mylocale)->strToLower,
+										mylocale,
 										&buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
 			pfree(buff_uchar);
@@ -1827,7 +1856,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 			UChar	   *buff_conv;
 
 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToUpper, mylocale,
+			len_conv = icu_convert_case(PG_ICU_LIB(mylocale)->strToUpper,
+										mylocale,
 										&buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
 			pfree(buff_uchar);
@@ -1950,8 +1980,9 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 			UChar	   *buff_conv;
 
 			len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
-			len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
-										&buff_conv, buff_uchar, len_uchar);
+			len_conv = icu_convert_case_bi(PG_ICU_LIB(mylocale)->strToTitle,
+										   mylocale,
+										   &buff_conv, buff_uchar, len_uchar);
 			icu_from_uchar(&result, buff_conv, len_conv);
 			pfree(buff_uchar);
 			pfree(buff_conv);
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a0490a7522..8dec0ff8c9 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -58,6 +58,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_control.h"
 #include "mb/pg_wchar.h"
+#include "miscadmin.h"
 #include "utils/builtins.h"
 #include "utils/formatting.h"
 #include "utils/hsearch.h"
@@ -68,6 +69,7 @@
 
 #ifdef USE_ICU
 #include <unicode/ucnv.h>
+#include <unicode/ustring.h>
 #endif
 
 #ifdef __GLIBC__
@@ -78,8 +80,27 @@
 #include <shlwapi.h>
 #endif
 
+#ifdef HAVE_DLOPEN
+#include <dlfcn.h>
+#endif
+
 #define		MAX_L10N_DATA		80
 
+#ifdef USE_ICU
+
+/*
+ * We don't want to call into dlopen'd ICU libraries that are newer than the
+ * one we were compiled and linked against, just in case there is an
+ * incompatible API change.
+ */
+#define PG_MAX_ICU_MAJOR_VERSION U_ICU_VERSION_MAJOR_NUM
+
+/* An old ICU release that we know has the right API. */
+/* XXX check if this is actually true, and if we could go back further */
+#define PG_MIN_ICU_MAJOR_VERSION 54
+
+#endif
+
 
 /* GUC settings */
 char	   *locale_messages;
@@ -1435,29 +1456,337 @@ lc_ctype_is_c(Oid collation)
 	return (lookup_collation_cache(collation, true))->ctype_is_c;
 }
 
+#ifdef USE_ICU
+
 struct pg_locale_struct default_locale;
 
+/* Linked list of ICU libraries we have loaded. */
+static pg_icu_library *icu_library_list = NULL;
+
+/*
+ * Free an ICU library.  pg_icu_library objects that are successfully
+ * constructed stick around for the lifetime of the backend, but this is used
+ * to clean up if initialization fails.
+ */
+static void
+free_icu_library(pg_icu_library *lib)
+{
+	if (lib->libicui18n_handle)
+		dlclose(lib->libicui18n_handle);
+	if (lib->libicuuc_handle)
+		dlclose(lib->libicuuc_handle);
+	pfree(lib);
+}
+
+static void *
+get_icu_function(void *handle, const char *function, int version)
+{
+	char		name[80];
+
+	snprintf(name, sizeof(name), "%s_%d", function, version);
+
+	return dlsym(handle, name);
+}
+
+/*
+ * Probe a dynamically loaded library to see which major version of ICU it
+ * contains.
+ */
+static int
+get_icu_library_major_version(void *handle)
+{
+	for (int i = PG_MIN_ICU_MAJOR_VERSION; i <= PG_MAX_ICU_MAJOR_VERSION; ++i)
+		if (get_icu_function(handle, "ucol_open", i) ||
+			get_icu_function(handle, "u_strToUpper", i))
+			return i;
+
+	/*
+	 * It's a later version we don't dare use, an old version we don't
+	 * support, an ICU build with symbol suffixes disabled, or not ICU.
+	 */
+	return -1;
+}
+
+/*
+ * We have to load a couple of different libraries, so we'll reuse the code to
+ * do that.
+ */
+static void *
+load_icu_library(pg_icu_library *lib, const char *name)
+{
+	void	   *handle;
+	int			found_major_version;
+
+	handle = dlopen(name, RTLD_NOW | RTLD_GLOBAL);
+	if (handle == NULL)
+	{
+		int			errno_save = errno;
+
+		free_icu_library(lib);
+		errno = errno_save;
+
+		ereport(ERROR,
+				(errmsg("could not load library \"%s\": %m", name)));
+	}
+
+	found_major_version = get_icu_library_major_version(handle);
+	if (found_major_version < 0)
+	{
+		free_icu_library(lib);
+		ereport(ERROR,
+				(errmsg("could not find compatible ICU major version in library \"%s\"",
+						name)));
+	}
+
+	if (found_major_version != lib->major_version)
+	{
+		free_icu_library(lib);
+		ereport(ERROR,
+				(errmsg("expected to find ICU major version %d in library \"%s\", but found %d",
+						lib->major_version, name, found_major_version)));
+	}
+
+	return handle;
+}
+
+/*
+ * Given an ICU major version number, return the object we need to access it,
+ * or fail while trying to load it.
+ */
+static pg_icu_library *
+get_icu_library(int major_version)
+{
+	pg_icu_library *lib;
+
+	Assert(major_version >= PG_MIN_ICU_MAJOR_VERSION &&
+		   major_version <= PG_MAX_ICU_MAJOR_VERSION);
+
+	/* Try to find it in our list of existing libraries. */
+	for (lib = icu_library_list; lib; lib = lib->next)
+		if (lib->major_version == major_version)
+			return lib;
+
+	/* Make a new entry. */
+	lib = MemoryContextAllocZero(TopMemoryContext, sizeof(*lib));
+	if (major_version == U_ICU_VERSION_MAJOR_NUM)
+	{
+		/*
+		 * This is the version we were compiled and linked against.  Simply
+		 * assign the function pointers.
+		 *
+		 * These assignments will fail to compile if an incompatible API
+		 * change is made to some future version of ICU, at which point we
+		 * might need to consider special treatment for different major
+		 * version ranges, with intermediate trampoline functions.
+		 */
+		lib->major_version = major_version;
+		lib->open = ucol_open;
+		lib->close = ucol_close;
+		lib->getVersion = ucol_getVersion;
+		lib->versionToString = u_versionToString;
+		lib->strcoll = ucol_strcoll;
+		lib->strcollUTF8 = ucol_strcollUTF8;
+		lib->getSortKey = ucol_getSortKey;
+		lib->nextSortKeyPart = ucol_nextSortKeyPart;
+		lib->setUTF8 = uiter_setUTF8;
+		lib->errorName = u_errorName;
+		lib->strToUpper = u_strToUpper;
+		lib->strToLower = u_strToLower;
+		lib->strToTitle = u_strToTitle;
+
+		/*
+		 * Also assert the size of a couple of types used as output buffers,
+		 * as a canary to tell us to add extra padding in the (unlikely) event
+		 * that a later release makes these values smaller.
+		 */
+		StaticAssertStmt(U_MAX_VERSION_STRING_LENGTH == 20,
+						 "u_versionToString output buffer size changed incompatibly");
+		StaticAssertStmt(U_MAX_VERSION_LENGTH == 4,
+						 "ucol_getVersion output buffer size changed incompatibly");
+	}
+	else
+	{
+		/* This is an older version, so we'll need to use dlopen(). */
+#ifdef HAVE_DLOPEN
+		char		libicui18n_name[MAXPGPATH];
+		char		libicuuc_name[MAXPGPATH];
+
+		/*
+		 * We don't like to open versions newer than what we're linked
+		 * against, to reduce the risk of an API change biting us.
+		 */
+		if (major_version > U_ICU_VERSION_MAJOR_NUM)
+			elog(ERROR, "ICU major version %d higher than linked version %d, refusing to open",
+				 major_version, U_ICU_VERSION_MAJOR_NUM);
+
+		lib->major_version = major_version;
+
+		/*
+		 * XXX using pkglib_path to look for libraries.  The idea is that an
+		 * administrator would drop symlinks in there.
+		 *
+		 * Perhaps should do something like dfmgr.c's dynamic_library_path
+		 * search to find it?
+		 */
+
+		/* Load the collation library. */
+		snprintf(libicui18n_name,
+				 sizeof(libicui18n_name),
+				 "%s/libicui18n" DLSUFFIX ".%d",
+				 pkglib_path,
+				 major_version);
+		lib->libicui18n_handle = load_icu_library(lib, libicui18n_name);
+
+		/* Load the ctype library. */
+		snprintf(libicuuc_name,
+				 sizeof(libicuuc_name),
+				 "%s/libicuuc" DLSUFFIX ".%d",
+				 pkglib_path,
+				 major_version);
+		lib->libicuuc_handle = load_icu_library(lib, libicuuc_name);
+
+		/* Look up all the functions we need. */
+		lib->open = get_icu_function(lib->libicui18n_handle,
+									 "ucol_open",
+									 major_version);
+		lib->close = get_icu_function(lib->libicui18n_handle,
+									  "ucol_close",
+									  major_version);
+		lib->getVersion = get_icu_function(lib->libicui18n_handle,
+										   "ucol_getVersion",
+										   major_version);
+		lib->versionToString = get_icu_function(lib->libicui18n_handle,
+												"u_versionToString",
+												major_version);
+		lib->strcoll = get_icu_function(lib->libicui18n_handle,
+										"ucol_strcoll",
+										major_version);
+		lib->strcollUTF8 = get_icu_function(lib->libicui18n_handle,
+											"ucol_strcollUTF8",
+											major_version);
+		lib->getSortKey = get_icu_function(lib->libicui18n_handle,
+										   "ucol_getSortKey",
+										   major_version);
+		lib->nextSortKeyPart = get_icu_function(lib->libicui18n_handle,
+												"ucol_nextSortKeyPart",
+												major_version);
+		lib->setUTF8 = get_icu_function(lib->libicui18n_handle,
+										"uiter_setUTF8",
+										major_version);
+		lib->errorName = get_icu_function(lib->libicui18n_handle,
+										  "u_errorName",
+										  major_version);
+		lib->strToUpper = get_icu_function(lib->libicuuc_handle,
+										   "u_strToUpper",
+										   major_version);
+		lib->strToLower = get_icu_function(lib->libicuuc_handle,
+										   "u_strToLower",
+										   major_version);
+		lib->strToTitle = get_icu_function(lib->libicuuc_handle,
+										   "u_strToTitle",
+										   major_version);
+		if (!lib->open ||
+			!lib->close ||
+			!lib->getVersion ||
+			!lib->versionToString ||
+			!lib->strcoll ||
+			!lib->strcollUTF8 ||
+			!lib->getSortKey ||
+			!lib->nextSortKeyPart ||
+			!lib->setUTF8 ||
+			!lib->errorName ||
+			!lib->strToUpper ||
+			!lib->strToLower ||
+			!lib->strToTitle)
+		{
+			free_icu_library(lib);
+			ereport(ERROR,
+					(errmsg("could not find expected symbols in library \"%s\"",
+							libicui18n_name)));
+		}
+#else
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("dynamically loaded ICU libraries are not supported in this build")));
+#endif
+	}
+
+	lib->next = icu_library_list;
+	icu_library_list = lib;
+
+	return lib;
+}
+
+/*
+ * Look up the library to use for a given collcollate string.
+ */
+static pg_icu_library *
+get_icu_library_for_collation(const char *collcollate, const char **rest)
+{
+	int			major_version;
+	char	   *separator;
+	char	   *after_prefix;
+
+	separator = strchr(collcollate, ':');
+
+	/*
+	 * If it's a traditional value without a prefix, use the library we are
+	 * linked against.
+	 */
+	if (separator == NULL)
+	{
+		*rest = collcollate;
+		return get_icu_library(U_ICU_VERSION_MAJOR_NUM);
+	}
+
+	/* If it has a prefix, interpret it as an ICU major version. */
+	major_version = strtol(collcollate, &after_prefix, 10);
+	if (after_prefix != separator)
+		elog(ERROR,
+			 "could not parse ICU major library version: \"%s\"",
+			 collcollate);
+	if (major_version < PG_MIN_ICU_MAJOR_VERSION ||
+		major_version > PG_MAX_ICU_MAJOR_VERSION)
+		elog(ERROR,
+			 "ICU major library verision out of supported range: \"%s\"",
+			 collcollate);
+
+	/* The part after the separate will be passed to the library. */
+	*rest = separator + 1;
+
+	return get_icu_library(major_version);
+}
+
+#endif
+
 void
 make_icu_collator(const char *iculocstr,
 				  struct pg_locale_struct *resultp)
 {
 #ifdef USE_ICU
+	pg_icu_library *lib;
 	UCollator  *collator;
 	UErrorCode	status;
 
+	lib = get_icu_library_for_collation(iculocstr, &iculocstr);
 	status = U_ZERO_ERROR;
-	collator = ucol_open(iculocstr, &status);
+	collator = lib->open(iculocstr, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						iculocstr, u_errorName(status))));
+						iculocstr, lib->errorName(status))));
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
+	/*
+	 * XXX can we just drop this cruft and make 54 the minimum supported
+	 * version?
+	 */
+	if (lib->major_version < 54)
 		icu_set_collation_attributes(collator, iculocstr);
 
 	/* We will leak this string if the caller errors later :-( */
 	resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
 	resultp->info.icu.ucol = collator;
+	resultp->info.icu.lib = lib;
 #else							/* not USE_ICU */
 	/* could get here if a collation was created by a build with ICU */
 	ereport(ERROR,
@@ -1688,21 +2017,23 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 #ifdef USE_ICU
 	if (collprovider == COLLPROVIDER_ICU)
 	{
+		pg_icu_library *lib;
 		UCollator  *collator;
 		UErrorCode	status;
 		UVersionInfo versioninfo;
 		char		buf[U_MAX_VERSION_STRING_LENGTH];
 
+		lib = get_icu_library_for_collation(collcollate, &collcollate);
 		status = U_ZERO_ERROR;
-		collator = ucol_open(collcollate, &status);
+		collator = lib->open(collcollate, &status);
 		if (U_FAILURE(status))
 			ereport(ERROR,
 					(errmsg("could not open collator for locale \"%s\": %s",
-							collcollate, u_errorName(status))));
-		ucol_getVersion(collator, versioninfo);
-		ucol_close(collator);
+							collcollate, lib->errorName(status))));
+		lib->getVersion(collator, versioninfo);
+		lib->close(collator);
 
-		u_versionToString(versioninfo, buf);
+		lib->versionToString(versioninfo, buf);
 		collversion = pstrdup(buf);
 	}
 	else
@@ -1770,6 +2101,8 @@ get_collation_actual_version(char collprovider, const char *collcollate)
 
 
 #ifdef USE_ICU
+
+
 /*
  * Converter object for converting between ICU's UChar strings and C strings
  * in database encoding.  Since the database encoding doesn't change, we only
@@ -1991,19 +2324,22 @@ void
 check_icu_locale(const char *icu_locale)
 {
 #ifdef USE_ICU
+	pg_icu_library *lib;
 	UCollator  *collator;
 	UErrorCode	status;
 
+	lib = get_icu_library_for_collation(icu_locale, &icu_locale);
 	status = U_ZERO_ERROR;
-	collator = ucol_open(icu_locale, &status);
+	collator = lib->open(icu_locale, &status);
 	if (U_FAILURE(status))
 		ereport(ERROR,
 				(errmsg("could not open collator for locale \"%s\": %s",
-						icu_locale, u_errorName(status))));
+						icu_locale, lib->errorName(status))));
 
-	if (U_ICU_VERSION_MAJOR_NUM < 54)
+	/* XXX can we just drop this cruft? */
+	if (lib->major_version < 54)
 		icu_set_collation_attributes(collator, icu_locale);
-	ucol_close(collator);
+	lib->close(collator);
 #else
 	ereport(ERROR,
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index bbeb0a2653..187f066881 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -1025,11 +1025,11 @@ hashbpchar(PG_FUNCTION_ARGS)
 
 			ulen = icu_to_uchar(&uchar, keydata, keylen);
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+													 uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+											 uchar, ulen, buf, bsize);
 
 			result = hash_any(buf, bsize);
 
@@ -1086,11 +1086,11 @@ hashbpcharextended(PG_FUNCTION_ARGS)
 
 			ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
 
-			bsize = ucol_getSortKey(mylocale->info.icu.ucol,
-									uchar, ulen, NULL, 0);
+			bsize = PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+													 uchar, ulen, NULL, 0);
 			buf = palloc(bsize);
-			ucol_getSortKey(mylocale->info.icu.ucol,
-							uchar, ulen, buf, bsize);
+			PG_ICU_LIB(mylocale)->getSortKey(PG_ICU_COL(mylocale),
+											 uchar, ulen, buf, bsize);
 
 			result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
 
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 919138eaf3..627fdeafa1 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -1666,13 +1666,14 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 					UErrorCode	status;
 
 					status = U_ZERO_ERROR;
-					result = ucol_strcollUTF8(mylocale->info.icu.ucol,
-											  arg1, len1,
-											  arg2, len2,
-											  &status);
+					result = PG_ICU_LIB(mylocale)->strcollUTF8(PG_ICU_COL(mylocale),
+															   arg1, len1,
+															   arg2, len2,
+															   &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
-								(errmsg("collation failed: %s", u_errorName(status))));
+								(errmsg("collation failed: %s",
+										PG_ICU_LIB(mylocale)->errorName(status))));
 				}
 				else
 #endif
@@ -1685,9 +1686,9 @@ varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
 					ulen1 = icu_to_uchar(&uchar1, arg1, len1);
 					ulen2 = icu_to_uchar(&uchar2, arg2, len2);
 
-					result = ucol_strcoll(mylocale->info.icu.ucol,
-										  uchar1, ulen1,
-										  uchar2, ulen2);
+					result = PG_ICU_LIB(mylocale)->strcoll(PG_ICU_COL(mylocale),
+														   uchar1, ulen1,
+														   uchar2, ulen2);
 
 					pfree(uchar1);
 					pfree(uchar2);
@@ -2389,13 +2390,14 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 				UErrorCode	status;
 
 				status = U_ZERO_ERROR;
-				result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
-										  a1p, len1,
-										  a2p, len2,
-										  &status);
+				result = PG_ICU_LIB(sss->locale)->strcollUTF8(PG_ICU_COL(sss->locale),
+															  a1p, len1,
+															  a2p, len2,
+															  &status);
 				if (U_FAILURE(status))
 					ereport(ERROR,
-							(errmsg("collation failed: %s", u_errorName(status))));
+							(errmsg("collation failed: %s",
+									PG_ICU_LIB(sss->locale)->errorName(status))));
 			}
 			else
 #endif
@@ -2408,9 +2410,9 @@ varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
 				ulen1 = icu_to_uchar(&uchar1, a1p, len1);
 				ulen2 = icu_to_uchar(&uchar2, a2p, len2);
 
-				result = ucol_strcoll(sss->locale->info.icu.ucol,
-									  uchar1, ulen1,
-									  uchar2, ulen2);
+				result = PG_ICU_LIB(sss->locale)->strcoll(PG_ICU_COL(sss->locale),
+														  uchar1, ulen1,
+														  uchar2, ulen2);
 
 				pfree(uchar1);
 				pfree(uchar2);
@@ -2571,24 +2573,24 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
 					uint32_t	state[2];
 					UErrorCode	status;
 
-					uiter_setUTF8(&iter, sss->buf1, len);
+					PG_ICU_LIB(sss->locale)->setUTF8(&iter, sss->buf1, len);
 					state[0] = state[1] = 0;	/* won't need that again */
 					status = U_ZERO_ERROR;
-					bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
-												 &iter,
-												 state,
-												 (uint8_t *) sss->buf2,
-												 Min(sizeof(Datum), sss->buflen2),
-												 &status);
+					bsize = PG_ICU_LIB(sss->locale)->nextSortKeyPart(PG_ICU_COL(sss->locale),
+																	 &iter,
+																	 state,
+																	 (uint8_t *) sss->buf2,
+																	 Min(sizeof(Datum), sss->buflen2),
+																	 &status);
 					if (U_FAILURE(status))
 						ereport(ERROR,
 								(errmsg("sort key generation failed: %s",
-										u_errorName(status))));
+										PG_ICU_LIB(sss->locale)->errorName(status))));
 				}
 				else
-					bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
-											uchar, ulen,
-											(uint8_t *) sss->buf2, sss->buflen2);
+					bsize = PG_ICU_LIB(sss->locale)->getSortKey(PG_ICU_COL(sss->locale),
+																uchar, ulen,
+																(uint8_t *) sss->buf2, sss->buflen2);
 			}
 			else
 #endif
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index e7385faef8..ca345d20af 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -17,6 +17,7 @@
 #endif
 #ifdef USE_ICU
 #include <unicode/ucol.h>
+#include <unicode/ubrk.h>
 #endif
 
 #include "utils/guc.h"
@@ -74,6 +75,71 @@ extern struct lconv *PGLC_localeconv(void);
 
 extern void cache_locale_time(void);
 
+#ifdef USE_ICU
+
+/*
+ * An ICU library version that we're either linked against or have loaded at
+ * runtime.
+ */
+typedef struct pg_icu_library
+{
+	int			major_version;
+	void	   *libicui18n_handle;
+	void	   *libicuuc_handle;
+	UCollator  *(*open) (const char *loc, UErrorCode *status);
+	void		(*close) (UCollator *coll);
+	void		(*getVersion) (const UCollator *coll, UVersionInfo info);
+	void		(*versionToString) (const UVersionInfo versionArray,
+									char *versionString);
+				UCollationResult(*strcoll) (const UCollator *coll,
+											const UChar *source,
+											int32_t sourceLength,
+											const UChar *target,
+											int32_t targetLength);
+				UCollationResult(*strcollUTF8) (const UCollator *coll,
+												const char *source,
+												int32_t sourceLength,
+												const char *target,
+												int32_t targetLength,
+												UErrorCode *status);
+	int32_t		(*getSortKey) (const UCollator *coll,
+							   const UChar *source,
+							   int32_t sourceLength,
+							   uint8_t *result,
+							   int32_t resultLength);
+	int32_t		(*nextSortKeyPart) (const UCollator *coll,
+									UCharIterator *iter,
+									uint32_t state[2],
+									uint8_t *dest,
+									int32_t count,
+									UErrorCode *status);
+	void		(*setUTF8) (UCharIterator *iter,
+							const char *s,
+							int32_t length);
+	const char *(*errorName) (UErrorCode code);
+	int32_t		(*strToUpper) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	int32_t		(*strToLower) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	int32_t		(*strToTitle) (UChar *dest,
+							   int32_t destCapacity,
+							   const UChar *src,
+							   int32_t srcLength,
+							   UBreakIterator *titleIter,
+							   const char *locale,
+							   UErrorCode *pErrorCode);
+	struct pg_icu_library *next;
+} pg_icu_library;
+
+#endif
 
 /*
  * We define our own wrapper around locale_t so we can keep the same
@@ -95,12 +161,18 @@ struct pg_locale_struct
 		{
 			const char *locale;
 			UCollator  *ucol;
+			pg_icu_library *lib;
 		}			icu;
 #endif
 		int			dummy;		/* in case we have neither LOCALE_T nor ICU */
 	}			info;
 };
 
+#ifdef USE_ICU
+#define PG_ICU_LIB(x) ((x)->info.icu.lib)
+#define PG_ICU_COL(x) ((x)->info.icu.ucol)
+#endif
+
 typedef struct pg_locale_struct *pg_locale_t;
 
 extern PGDLLIMPORT struct pg_locale_struct default_locale;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 4fb746930a..47aba7654b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1100,6 +1100,7 @@ HeapTupleTableSlot
 HistControl
 HotStandbyState
 I32
+ICU_Convert_BI_Func
 ICU_Convert_Func
 ID
 INFIX
@@ -2858,6 +2859,7 @@ TypeName
 U
 U32
 U8
+UBreakIterator
 UChar
 UCharIterator
 UColAttribute
@@ -3485,6 +3487,7 @@ pg_funcptr_t
 pg_gssinfo
 pg_hmac_ctx
 pg_hmac_errno
+pg_icu_library
 pg_int64
 pg_local_to_utf_combined
 pg_locale_t
-- 
2.30.2

Reply via email to