Hi,

Karl Williamson wrote on Mon, Mar 19, 2018 at 11:51:31AM -0600:

> I still believe that in my program the setlocale() returning C for 
> LC_ALL is a bug.  LC_CTYPE should have successfully been set to Romanian 
> UTF-8, and so LC_ALL isn't C.  Instead, it is a combination of C for all 
> the other categories, and UTF-8 for LC_CTYPE.  A return of just "C" 
> doesn't reflect that complexity.   There is a footnote in the ANSI/ISO 
> 9899-1990 C standard that the returned string must support that 
> heterogeneity, and that the return value be able to be used in a future 
> setlocale to get back to the original state.  Your setlocale violates 
> the standard therefore, and harms your portability goal.

So, here is a patch to fix that, together with two other minor bugs
that i found while looking at my code, and two minor style cleanups.

OK?
  Ingo

Rationale:
 * chunk 39, first part (style):
   There is no need for calloc(3) because all _LC_LAST entries
   in newgl will be assigned to right away.
 * chunk 39, second part (main bugfix):
   The problem:
     In the sequence
       setlocale(LC_ALL, "A");
       setlocale(LC_CTYPE, "T");
       setlocale(LC_ALL, NULL);
     the last call returns "A" but ought to return "A/T/A/A/A/A".
   The root cause:
     setlocale(LC_ALL, "A") sets the LC_ALL entry in global_locale,
     but setlocale(LC_CTYPE, "T") neglects to clear it.
   The solution:
     dupgl() must never copy the LC_ALL entry.
     When we are setting a category other than LC_ALL, or when we
     are setting all categories with the LC_ALL, "/////" syntax,
     the old LC_ALL entry becomes invalid.  When we are setting
     LC_ALL uniformly, it will be replaced anyway.
 * chunk 92 (first additonal minor bug):
   If strdup(locname) fails, newgl is leaked.
 * chunk 136 (style):
   Resolve code duplication.
 * chunk 184 (second additional minor bug):
   Consider the nonsensical
     setlocale(LC_ALL, "C/C/fr_FR.UTF-8/C/C/C");
   Of course, it makes no sense to request UTF-8 messages
   in ASCII encoding.  But *if* that is requested, the code finds
   the dot in global_locname and switches the _GlobalRuneLocale
   to UTF-8, which is wrong, as "C" was requested for LC_CTYPE.
   Fix this by looking at the LC_CTYPE entry only when deciding
   what to use for _GlobalRuneLocale.
   There is no risk associated with this fix because we ignore
   LC_MESSAGES etc. anyway, so we know we wont actually try to
   print UTF-8 into an ASCII output stream.


Index: lib/libc/locale/setlocale.c
===================================================================
RCS file: /cvs/src/lib/libc/locale/setlocale.c,v
retrieving revision 1.27
diff -u -p -r1.27 setlocale.c
--- lib/libc/locale/setlocale.c 5 Sep 2017 03:16:13 -0000       1.27
+++ lib/libc/locale/setlocale.c 28 Mar 2018 12:54:42 -0000
@@ -39,11 +39,12 @@ dupgl(char **oldgl)
        char **newgl;
        int ic;
 
-       if ((newgl = calloc(_LC_LAST, sizeof(*newgl))) == NULL) 
+       if ((newgl = reallocarray(NULL, _LC_LAST, sizeof(*newgl))) == NULL)     
                return NULL;
-       for (ic = LC_ALL; ic < _LC_LAST; ic++) {
-               if ((newgl[ic] = strdup(oldgl != NULL ?
-                   oldgl[ic] : ic == LC_ALL ? "" : "C")) == NULL) {
+       newgl[LC_ALL] = NULL;
+       for (ic = 1; ic < _LC_LAST; ic++) {
+               newgl[ic] = strdup(oldgl != NULL ? oldgl[ic] : "C");
+               if (newgl[ic] == NULL) {
                        freegl(newgl);
                        return NULL;
                }
@@ -92,8 +93,10 @@ setlocale(int category, const char *locn
                if (category == LC_ALL && strchr(locname, '/') != NULL) {
 
                        /* One value for each category. */
-                       if ((firstname = strdup(locname)) == NULL)
+                       if ((firstname = strdup(locname)) == NULL) {
+                               freegl(newgl);
                                return NULL;
+                       }
                        nextname = firstname;
                        for (ic = 1; ic < _LC_LAST; ic++)
                                if (nextname == NULL || changegl(ic,
@@ -136,22 +139,14 @@ setlocale(int category, const char *locn
                goto done;
        }
 
-       /* Individual category. */
-       if (category > LC_ALL) {
+       /* Individual category, or LC_ALL uniformly set. */
+       if (category > LC_ALL || newgl[LC_ALL] != NULL) {
                if (strlcpy(global_locname, newgl[category],
                    sizeof(global_locname)) >= sizeof(global_locname))
                        global_locname[0] = '\0';
                goto done;
        }
 
-       /* LC_ALL overrides everything else. */
-       if (newgl[LC_ALL][0] != '\0') {
-               if (strlcpy(global_locname, newgl[LC_ALL],
-                   sizeof(global_locname)) >= sizeof(global_locname))
-                       global_locname[0] = '\0';
-               goto done;
-       }
-
        /*
         * Check whether all categories agree and return either
         * the single common name for all categories or a string
@@ -184,7 +179,7 @@ done:
                        global_locale = newgl;
                        if (category == LC_ALL || category == LC_CTYPE)
                                _GlobalRuneLocale =
-                                   strchr(global_locname, '.') == NULL ?
+                                   strchr(newgl[LC_CTYPE], '.') == NULL ?
                                    &_DefaultRuneLocale : _Utf8RuneLocale;
                } else {
                        freegl(newgl);
Index: regress/lib/libc/locale/setlocale/setlocale.c
===================================================================
RCS file: /cvs/src/regress/lib/libc/locale/setlocale/setlocale.c,v
retrieving revision 1.3
diff -u -p -r1.3 setlocale.c
--- regress/lib/libc/locale/setlocale/setlocale.c       25 Feb 2017 07:28:32 
-0000      1.3
+++ regress/lib/libc/locale/setlocale/setlocale.c       28 Mar 2018 12:54:42 
-0000
@@ -75,7 +75,6 @@ main(int argc, char *argv[])
 
        /* load from env */
        /* NOTE: we don't support non-C locales for some categories */
-       /*test_setlocale("fr_FR.UTF-8", LC_ALL, "");*/ /* set */
        test_setlocale("fr_FR.UTF-8", LC_CTYPE, ""); /* set */
        test_setlocale("fr_FR.UTF-8", LC_MESSAGES, ""); /* set */
        test_MB_CUR_MAX(4);
@@ -113,6 +112,7 @@ main(int argc, char *argv[])
        test_setlocale("C", LC_ALL, "C"); /* reset */
        test_setlocale("invalid.UTF-8", LC_CTYPE, "invalid.UTF-8"); /* set */
        test_setlocale("invalid.UTF-8", LC_CTYPE, NULL);
+       test_setlocale("C/invalid.UTF-8/C/C/C/C", LC_ALL, NULL);
        test_MB_CUR_MAX(4);
 
        /* with invalid codeset (is an error) */

Reply via email to