On 12/13/2011 11:32 AM, Bruno Haible wrote:
> I would love to have locale_charset be either faster or use some thread-safe
> cache. Do you have an idea how to realize this?
For quite some time I use this locale_charset optimization, which uses
binary search for locale alias resolution.
--
Alexander.
diff --git a/lib/localcharset.c b/lib/localcharset.c
index bfd9a03..fd677cf 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -113,19 +113,31 @@
#if __STDC__ != 1
# define volatile /* empty */
#endif
-/* Pointer to the contents of the charset.alias file, if it has already been
- read, else NULL. Its format is:
- ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */
-static const char * volatile charset_aliases;
+/* Pointer to the ordered array of charset aliases read.
+ Format of a single alias is: ALIAS '\0' CANONICAL '\0' */
+static const char ** volatile charset_aliases;
+/* Aliases count, -1 until charset aliases are read. */
+static int volatile charset_aliases_count = -1;
+/* fallback charset name, "*" entry from aliases */
+static const char * volatile charset_fallback;
+
+static int
+charset_alias_cmp(const void *a,const void *b)
+{
+ return strcmp (*(const char**)a, *(const char**)b);
+}
-/* Return a pointer to the contents of the charset.alias file. */
-static const char *
+/* Reads the contents of the charset.alias file. */
+static void
get_charset_aliases (void)
{
const char *cp;
+ const char **cps = 0;
+ int cnt = 0;
+ const char *fb = 0;
+ const char *aliases;
- cp = charset_aliases;
- if (cp == NULL)
+ if (charset_aliases_count < 0)
{
#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined
__CYGWIN__)
const char *dir;
@@ -343,10 +355,67 @@ get_charset_aliases (void)
# endif
#endif
- charset_aliases = cp;
+ /* build ordered array of aliases and also find fallback */
+ cnt = 0;
+ for (aliases = cp; *aliases != '\0';
+ aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
+ {
+ if (aliases[0] == '*' && aliases[1] == '\0')
+ fb = aliases + strlen (aliases) + 1;
+ else
+ cnt ++;
+ }
+
+ if (cnt > 0)
+ {
+ cps = (const char **) malloc (cnt * sizeof(*cps));
+ if (cps == NULL)
+ {
+ /* out of memory */
+ charset_aliases_count = 0;
+ return;
+ }
+
+ cnt = 0;
+ for (aliases = cp; *aliases != '\0';
+ aliases += strlen (aliases) + 1, aliases += strlen (aliases) +
1)
+ {
+ if (!(aliases[0] == '*' && aliases[1] == '\0'))
+ cps[cnt++] = aliases;
+ }
+
+ qsort (cps, cnt, sizeof(*cps), charset_alias_cmp);
+ }
+
+ charset_aliases = cps;
+ charset_fallback = fb;
+ charset_aliases_count = cnt;
}
+}
+
+/* Replace codeset name using the alias table.
+ The result must not be freed; it is statically allocated.
+*/
+
+static const char *
+resolve_alias (const char *codeset)
+{
+ const char **alias_ptr = 0;
+
+ get_charset_aliases ();
+
+ if (charset_aliases_count > 0)
+ alias_ptr = bsearch (&codeset, charset_aliases,
+ charset_aliases_count, sizeof(codeset),
+ charset_alias_cmp);
+
+ if (alias_ptr != NULL)
+ return *alias_ptr + strlen (*alias_ptr) + 1;
+
+ if (charset_fallback != NULL)
+ return charset_fallback;
- return cp;
+ return codeset;
}
/* Determine the current locale's character encoding, and canonicalize it
@@ -362,7 +431,6 @@ const char *
locale_charset (void)
{
const char *codeset;
- const char *aliases;
#if !(defined WINDOWS_NATIVE || defined OS2)
@@ -530,16 +598,7 @@ locale_charset (void)
/* The canonical name cannot be determined. */
codeset = "";
- /* Resolve alias. */
- for (aliases = get_charset_aliases ();
- *aliases != '\0';
- aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
- if (strcmp (codeset, aliases) == 0
- || (aliases[0] == '*' && aliases[1] == '\0'))
- {
- codeset = aliases + strlen (aliases) + 1;
- break;
- }
+ codeset = resolve_alias (codeset);
/* Don't return an empty string. GNU libc and GNU libiconv interpret
the empty string as denoting "the locale's character encoding",