Author: bapt
Date: Mon May  1 12:42:06 2017
New Revision: 317647
URL: https://svnweb.freebsd.org/changeset/base/317647

Log:
  MFC r317034:
  
  Fix strcoll_l disagreeing with strxfrm by reworking the forward order case in
  wcscoll_l().
  
  Illumos fixed this while grabbing back our patches:
  https://www.illumos.org/rb/r/402/
  
  This does not 100% fix what postgresql folks reported as there is still a
  remaining issue: https://www.illumos.org/issues/7962, it improves the 
situation
  
  The initial issue was reported in postgresql mailing lists:
  
https://www.postgresql.org/message-id/flat/111d0e27-a8f3-4a84-a4e0-b0fb70386...@s24.com#111d0e27-a8f3-4a84-a4e0-b0fb70386...@s24.com
  
  Submitted by: Yuri Pankov <yuri.pan...@nexenta.com>
  Obtained from:        Illumos

Modified:
  stable/11/lib/libc/string/wcscoll.c
  stable/11/lib/libc/tests/string/wcscoll_test.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/lib/libc/string/wcscoll.c
==============================================================================
--- stable/11/lib/libc/string/wcscoll.c Mon May  1 12:41:10 2017        
(r317646)
+++ stable/11/lib/libc/string/wcscoll.c Mon May  1 12:42:06 2017        
(r317647)
@@ -1,5 +1,5 @@
 /*-
- * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
  * Copyright (c) 2002 Tim J. Robbins
  * All rights reserved.
  *
@@ -42,22 +42,22 @@ __FBSDID("$FreeBSD$");
 int
 wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
 {
-       int len1, len2, pri1, pri2, ret;
+       int len1, len2, pri1, pri2;
        wchar_t *tr1 = NULL, *tr2 = NULL;
        int direc, pass;
+       int ret = wcscmp(ws1, ws2);
 
        FIX_LOCALE(locale);
        struct xlocale_collate *table =
                (struct xlocale_collate*)locale->components[XLC_COLLATE];
 
-       if (table->__collate_load_error)
-               /*
-                * Locale has no special collating order or could not be
-                * loaded, do a fast binary comparison.
-                */
-               return (wcscmp(ws1, ws2));
+       if (table->__collate_load_error || ret == 0)
+               return (ret);
 
-       ret = 0;
+       if (*ws1 == 0 && *ws2 != 0)
+               return (-1);
+       if (*ws1 != 0 && *ws2 == 0)
+               return (1);
 
        /*
         * Once upon a time we had code to try to optimize this, but
@@ -77,19 +77,19 @@ wcscoll_l(const wchar_t *ws1, const wcha
                const int32_t *st2 = NULL;
                const wchar_t   *w1 = ws1;
                const wchar_t   *w2 = ws2;
-               int check1, check2;
 
                /* special pass for UNDEFINED */
                if (pass == table->info->directive_count) {
-                       direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+                       direc = DIRECTIVE_FORWARD;
                } else {
                        direc = table->info->directive[pass];
                }
 
                if (direc & DIRECTIVE_BACKWARD) {
                        wchar_t *bp, *fp, c;
+                       free(tr1);
                        if ((tr1 = wcsdup(w1)) == NULL)
-                               goto fail;
+                               goto end;
                        bp = tr1;
                        fp = tr1 + wcslen(tr1) - 1;
                        while (bp < fp) {
@@ -97,8 +97,9 @@ wcscoll_l(const wchar_t *ws1, const wcha
                                *bp++ = *fp;
                                *fp-- = c;
                        }
+                       free(tr2);
                        if ((tr2 = wcsdup(w2)) == NULL)
-                               goto fail;
+                               goto end;
                        bp = tr2;
                        fp = tr2 + wcslen(tr2) - 1;
                        while (bp < fp) {
@@ -111,6 +112,7 @@ wcscoll_l(const wchar_t *ws1, const wcha
                }
 
                if (direc & DIRECTIVE_POSITION) {
+                       int check1, check2;
                        while (*w1 && *w2) {
                                pri1 = pri2 = 0;
                                check1 = check2 = 1;
@@ -120,7 +122,7 @@ wcscoll_l(const wchar_t *ws1, const wcha
                                                    &pri1, pass, &st1);
                                                if (pri1 < 0) {
                                                        errno = EINVAL;
-                                                       goto fail;
+                                                       goto end;
                                                }
                                                if (!pri1) {
                                                        pri1 = 
COLLATE_MAX_PRIORITY;
@@ -133,7 +135,7 @@ wcscoll_l(const wchar_t *ws1, const wcha
                                                    &pri2, pass, &st2);
                                                if (pri2 < 0) {
                                                        errno = EINVAL;
-                                                       goto fail;
+                                                       goto end;
                                                }
                                                if (!pri2) {
                                                        pri2 = 
COLLATE_MAX_PRIORITY;
@@ -149,58 +151,64 @@ wcscoll_l(const wchar_t *ws1, const wcha
                                w1 += len1;
                                w2 += len2;
                        }
+                       if (!*w1) {
+                               if (*w2) {
+                                       ret = -(int)*w2;
+                                       goto end;
+                               }
+                       } else {
+                               ret = *w1;
+                               goto end;
+                       }
                } else {
-                       while (*w1 && *w2) {
-                               pri1 = pri2 = 0;
-                               check1 = check2 = 1;
-                               while ((pri1 == pri2) && (check1 || check2)) {
-                                       while (check1 && *w1) {
-                                               _collate_lookup(table, w1,
-                                                   &len1, &pri1, pass, &st1);
-                                               if (pri1 > 0)
-                                                       break;
-                                               if (pri1 < 0) {
-                                                       errno = EINVAL;
-                                                       goto fail;
-                                               }
-                                               st1 = NULL;
-                                               w1 += 1;
+                       int vpri1 = 0, vpri2 = 0;
+                       while (*w1 || *w2 || st1 || st2) {
+                               pri1 = 1;
+                               while (*w1 || st1) {
+                                       _collate_lookup(table, w1, &len1, &pri1,
+                                           pass, &st1);
+                                       w1 += len1;
+                                       if (pri1 > 0) {
+                                               vpri1++;
+                                               break;
                                        }
-                                       check1 = (st1 != NULL);
-                                       while (check2 && *w2) {
-                                               _collate_lookup(table, w2,
-                                                   &len2, &pri2, pass, &st2);
-                                               if (pri2 > 0)
-                                                       break;
-                                               if (pri2 < 0) {
-                                                       errno = EINVAL;
-                                                       goto fail;
-                                               }
-                                               st2 = NULL;
-                                               w2 += 1;
+
+                                       if (pri1 < 0) {
+                                               errno = EINVAL;
+                                               goto end;
                                        }
-                                       check2 = (st2 != NULL);
-                                       if (!pri1 || !pri2)
+                                       st1 = NULL;
+                               }
+                               pri2 = 1;
+                               while (*w2 || st2) {
+                                       _collate_lookup(table, w2, &len2, &pri2,
+                                           pass, &st2);
+                                       w2 += len2;
+                                       if (pri2 > 0) {
+                                               vpri2++;
                                                break;
+                                       }
+                                       if (pri2 < 0) {
+                                               errno = EINVAL;
+                                               goto end;
+                                       }
+                                       st2 = NULL;
                                }
-                               if (!pri1 || !pri2)
+                               if ((!pri1 || !pri2) && (vpri1 == vpri2))
                                        break;
                                if (pri1 != pri2) {
                                        ret = pri1 - pri2;
                                        goto end;
                                }
-                               w1 += len1;
-                               w2 += len2;
                        }
-               }
-               if (!*w1) {
-                       if (*w2) {
-                               ret = -(int)*w2;
+                       if (vpri1 && !vpri2) {
+                               ret = 1;
+                               goto end;
+                       }
+                       if (!vpri1 && vpri2) {
+                               ret = -1;
                                goto end;
                        }
-               } else {
-                       ret = *w1;
-                       goto end;
                }
        }
        ret = 0;
@@ -210,10 +218,6 @@ end:
        free(tr2);
 
        return (ret);
-
-fail:
-       ret = wcscmp(ws1, ws2);
-       goto end;
 }
 
 int

Modified: stable/11/lib/libc/tests/string/wcscoll_test.c
==============================================================================
--- stable/11/lib/libc/tests/string/wcscoll_test.c      Mon May  1 12:41:10 
2017        (r317646)
+++ stable/11/lib/libc/tests/string/wcscoll_test.c      Mon May  1 12:42:06 
2017        (r317647)
@@ -1,5 +1,7 @@
 /*-
  * Copyright (c) 2016 Baptiste Daroussin <b...@freebsd.org>
+ * Copyright 2016 Tom Lane <t...@sss.pgh.pa.us>
+ * Copyright 2017 Nexenta Systems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -30,6 +32,8 @@ __FBSDID("$FreeBSD$");
 #include <wchar.h>
 #include <locale.h>
 #include <stdlib.h>
+#include <time.h>
+#include <errno.h>
 
 #include <atf-c.h>
 
@@ -55,9 +59,98 @@ ATF_TC_BODY(russian_collation, tc)
            "Bad collation, expected: '%ls' got '%ls'", res, c);
 }
 
+#define        NSTRINGS 2000
+#define        MAXSTRLEN 20
+#define        MAXXFRMLEN (MAXSTRLEN * 20)
+
+typedef struct {
+       char    sval[MAXSTRLEN];
+       char    xval[MAXXFRMLEN];
+} cstr;
+
+ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
+ATF_TC_BODY(strcoll_vs_strxfrm, tc)
+{
+       cstr    data[NSTRINGS];
+       char    *curloc;
+       int     i, j;
+
+       curloc = setlocale(LC_ALL, "en_US.UTF-8");
+       ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
+
+       /* Ensure new random() values on every run */
+       srandom((unsigned int) time(NULL));
+
+       /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
+       for (i = 0; i < NSTRINGS; i++) {
+               char    *p;
+               int     len;
+
+again:
+               p = data[i].sval;
+               len = 1 + (random() % (MAXSTRLEN - 1));
+               while (len > 0) {
+                       int c;
+                       /*
+                        * Generate random printable char in ISO8859-1 range.
+                        * Bias towards producing a lot of spaces.
+                        */
+
+                       if ((random() % 16) < 3) {
+                               c = ' ';
+                       } else {
+                               do {
+                                       c = random() & 0xFF;
+                               } while (!((c >= ' ' && c <= 127) ||
+                                   (c >= 0xA0 && c <= 0xFF)));
+                       }
+
+                       if (c <= 127) {
+                               *p++ = c;
+                               len--;
+                       } else {
+                               if (len < 2)
+                                       break;
+                               /* Poor man's utf8-ification */
+                               *p++ = 0xC0 + (c >> 6);
+                               len--;
+                               *p++ = 0x80 + (c & 0x3F);
+                               len--;
+                       }
+               }
+               *p = '\0';
+               /* strxfrm() each string as we produce it */
+               errno = 0;
+               ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
+                   MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
+                   " string exceeded %d bytes", (int)strlen(data[i].sval),
+                   MAXXFRMLEN);
+
+               /*
+                * Amend strxfrm() failing on certain characters to be fixed and
+                * test later
+                */
+               if (errno != 0)
+                       goto again;
+       }
+
+       for (i = 0; i < NSTRINGS; i++) {
+               for (j = 0; j < NSTRINGS; j++) {
+                       int sr = strcoll(data[i].sval, data[j].sval);
+                       int sx = strcmp(data[i].xval, data[j].xval);
+
+                       ATF_CHECK_MSG(!((sr * sx < 0) ||
+                           (sr * sx == 0 && sr + sx != 0)),
+                           "%s: diff for \"%s\" and \"%s\"",
+                           curloc, data[i].sval, data[j].sval);
+               }
+       }
+}
+
 ATF_TP_ADD_TCS(tp)
 {
        ATF_TP_ADD_TC(tp, russian_collation);
+       ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
 
        return (atf_no_error());
 }
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to