This patch address two problems.

1st one is relatively minor: according our own manpage, upper and lower 
classes must be sorted, but currently not.

2nd one is serious: 
        tr '[:lower:]' '[:upper:]'
(and vice versa) currently works only if upper and lower classes
have exact the same number of elements. When it is not true, like for
many ISO8859-x locales which have bigger amount of lowercase letters,
tr may do nasty things. The patch is complex, because whole conversion
string need to be processed each time l-u or u->l conversion occurse,
not single character at time, like in previous variant.

See this page
http://www.opengroup.org/onlinepubs/007908799/xcu/tr.html
for detailed description of desired tr behaviour in such cases.

Please test this patch on your system & locale and report me any strange 
things.

diff -u ./extern.h /usr/src/usr.bin/tr/extern.h
--- ./extern.h  Fri Jun 14 19:56:52 2002
+++ /usr/src/usr.bin/tr/extern.h        Fri Aug  1 04:19:36 2003
@@ -40,7 +40,8 @@
 
 typedef struct {
        enum { STRING1, STRING2 } which;
-       enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state;
+       enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE,
+              SET, SET_UPPER, SET_LOWER } state;
        int      cnt;                   /* character count */
        int      lastch;                /* last character */
        int     equiv[NCHARS];          /* equivalence set */
@@ -49,3 +50,5 @@
 } STR;
 
 int     next(STR *);
+int charcoll(const void *, const void *);
+
diff -u ./str.c /usr/src/usr.bin/tr/str.c
--- ./str.c     Fri Jul  5 13:28:13 2002
+++ /usr/src/usr.bin/tr/str.c   Fri Aug  1 04:22:11 2003
@@ -106,6 +106,8 @@
                }
                return (1);
        case SET:
+       case SET_UPPER:
+       case SET_LOWER:
                if ((s->lastch = s->set[s->cnt++]) == OOBCH) {
                        s->state = NORMAL;
                        return (next(s));
@@ -194,7 +196,7 @@
 {
        int cnt, (*func)(int);
        CLASS *cp, tmp;
-       int *p;
+       int *p, n;
 
        tmp.name = s->str;
        if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) /
@@ -208,10 +210,18 @@
                if ((func)(cnt))
                        *p++ = cnt;
        *p = OOBCH;
+       n = p - cp->set;
 
        s->cnt = 0;
-       s->state = SET;
        s->set = cp->set;
+       if (strcmp(s->str, "upper") == 0)
+               s->state = SET_UPPER;
+       else if (strcmp(s->str, "lower") == 0) {
+               s->state = SET_LOWER;
+       } else
+               s->state = SET;
+       if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1)
+               mergesort(s->set, n, sizeof(*(s->set)), charcoll);
 }
 
 static int
diff -u ./tr.c /usr/src/usr.bin/tr/tr.c
--- ./tr.c      Thu Sep  5 03:29:07 2002
+++ /usr/src/usr.bin/tr/tr.c    Fri Aug  1 04:32:01 2003
@@ -101,8 +101,9 @@
 STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL };
 
-static int charcoll(const void *, const void *);
 static void setup(int *, char *, STR *, int, int);
+static void process_upper(int);
+static void process_lower(int);
 static void usage(void);
 
 int
@@ -110,7 +111,7 @@
 {
        static int collorder[NCHARS], tmpmap[NCHARS];
        int ch, cnt, lastch, *p;
-       int Cflag, cflag, dflag, sflag, isstring2;
+       int Cflag, cflag, dflag, sflag, isstring2, do_upper, do_lower;
 
        (void)setlocale(LC_ALL, "");
 
@@ -224,19 +225,67 @@
        if (!next(&s2))
                errx(1, "empty string2");
 
-       ch = s2.lastch;
+       do_upper = do_lower = 0;
        /* If string2 runs out of characters, use the last one specified. */
-       if (sflag)
-               while (next(&s1)) {
-                       string1[s1.lastch] = ch = s2.lastch;
-                       string2[ch] = 1;
-                       (void)next(&s2);
-               }
-       else
-               while (next(&s1)) {
-                       string1[s1.lastch] = ch = s2.lastch;
-                       (void)next(&s2);
+       while (next(&s1)) {
+               if (s1.state == SET_LOWER &&
+                   s2.state == SET_UPPER) {
+                       if (do_lower) {
+                               process_lower(sflag);
+                               do_lower = 0;
+                       }
+                       do_upper = 1;
+               } else if (s1.state == SET_UPPER &&
+                          s2.state == SET_LOWER) {
+                       if (do_upper) {
+                               process_upper(sflag);
+                               do_upper = 0;
+                       }
+                       do_lower = 1;
+               } else {
+                       if (do_lower) {
+                               /* Skip until aligned */
+                               if (s1.state == SET_UPPER) {
+                                       do {
+                                               if (!next(&s1))
+                                                       goto endloop;
+                                       } while (s1.state == SET_UPPER);
+                               } else if (s2.state == SET_LOWER) {
+                                       do {
+                                               if (!next(&s2))
+                                                       break;
+                                       } while (s2.state == SET_LOWER);
+                               }
+                               process_lower(sflag);
+                               do_lower = 0;
+                       } else if (do_upper) {
+                               /* Skip until aligned */
+                               if (s1.state == SET_LOWER) {
+                                       do {
+                                               if (!next(&s1))
+                                                       goto endloop;
+                                       } while (s1.state == SET_LOWER);
+                               } else if (s2.state == SET_UPPER) {
+                                       do {
+                                               if (!next(&s2))
+                                                       break;
+                                       } while (s2.state == SET_UPPER);
+                               }
+                               process_upper(sflag);
+                               do_upper = 0;
+                       }
+                       string1[s1.lastch] = s2.lastch;
+                       if (sflag)
+                               string2[s2.lastch] = 1;
                }
+               (void)next(&s2);
+       }
+endloop:
+       if (do_lower)
+               process_lower(sflag);
+       else if (do_upper)
+               process_upper(sflag);
+       /* End of upper & lower special processing */
 
        if (cflag || Cflag) {
                s2.str = argv[1];
@@ -294,15 +343,55 @@
                        string[cnt] = !string[cnt] && ISCHAR(cnt);
 }
 
-static int
+int
 charcoll(const void *a, const void *b)
 {
-       char sa[2], sb[2];
+       static char sa[2], sb[2];
 
        sa[0] = *(const int *)a;
        sb[0] = *(const int *)b;
-       sa[1] = sb[1] = '\0';
        return (strcoll(sa, sb));
+}
+
+
+/*
+ * For -s result will contain only those characters defined
+ * as the second characters in each of the toupper or tolower
+ * pairs.
+ */
+
+static void
+process_upper(int sflag)
+{
+       int cnt, ch;
+
+       for (cnt = 0; cnt < NCHARS; cnt++) {
+               ch = string1[cnt];
+               if (ch == OOBCH)        /* [Cc]flag */
+                       ch = cnt;
+               if (islower(ch)) {
+                       string1[cnt] = ch = toupper(ch);
+                       if (sflag && isupper(ch))
+                               string2[ch] = 1;
+               }
+       }
+}
+
+static void
+process_lower(int sflag)
+{
+       int cnt, ch;
+
+       for (cnt = 0; cnt < NCHARS; cnt++) {
+               ch = string1[cnt];
+               if (ch == OOBCH)        /* [Cc]flag */
+                       ch = cnt;
+               if (isupper(ch)) {
+                       string1[cnt] = ch = tolower(ch);
+                       if (sflag && islower(ch))
+                               string2[ch] = 1;
+               }
+       }
 }
 
 static void
_______________________________________________
[EMAIL PROTECTED] mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to