martin      99/11/24 14:31:10

  Modified:    src/regex regcomp.c
  Log:
  This patch fixes the some of the bogosity in regular expressions
  on EBCDIC based machines: a character range [a-z] would match much more
  than only the islower() characters because in the EBCDIC charset there
  are "holes in the contiguity" between a-i, j-r and s-z.
  
  This patch fixes [<lowercase>-<lowercase>] and [<uppercase>-<uppercase>]
  ranges by only regarding alphabetic characters between the lower and
  upper bound. (Any other range definition remains unchanged.)
  
  Revision  Changes    Path
  1.10      +37 -0     apache-1.3/src/regex/regcomp.c
  
  Index: regcomp.c
  ===================================================================
  RCS file: /export/home/cvs/apache-1.3/src/regex/regcomp.c,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- regcomp.c 1998/09/15 19:47:45     1.9
  +++ regcomp.c 1999/11/24 22:31:08     1.10
  @@ -100,7 +100,30 @@
   #else
   #    define  GOODFLAGS(f)    ((f)&~REG_DUMP)
   #endif
  +#ifdef CHARSET_EBCDIC /* Added for Apache by <[EMAIL PROTECTED]> */
  +     static int initialized = 0;
   
  +     if (!initialized) {
  +             unsigned ch, idx = 0;
  +             static unsigned char ctlchars_ebcdic[256+1];
  +
  +             for (ch = 1; ch <= 0xFF; ++ch) {
  +                     if (ap_iscntrl(ch)) {
  +                             ctlchars_ebcdic[idx++] = ch;
  +                     }
  +             }
  +             ctlchars_ebcdic[idx++] = '\0'; /* redundant */
  +
  +             for (idx=0; idx < sizeof(cclasses) / sizeof(cclasses[0]); 
++idx) {
  +                     if (strcmp(cclasses[idx].name, "cntrl") == 0) {
  +                             cclasses[idx].chars = ctlchars_ebcdic;
  +                             break;
  +                     }
  +             }
  +             initialized = 1;
  +     }
  +#endif /*CHARSET_EBCDIC*/
  +
        cflags = GOODFLAGS(cflags);
        if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
                return(REG_INVARG);
  @@ -708,8 +731,22 @@
                        finish = start;
   /* xxx what about signed chars here... */
                REQUIRE(start <= finish, REG_ERANGE);
  +#ifndef CHARSET_EBCDIC
                for (i = start; i <= finish; i++)
                        CHadd(cs, i);
  +#else /* Added for Apache by <[EMAIL PROTECTED]> */
  +             /* Special provision for character ranges [a-zA-Z], */
  +             /* which are non-contiguous in EBCDIC: */
  +             if ((ap_isupper(start) && ap_isupper(finish)) ||
  +                 (ap_islower(start) && ap_islower(finish))) {
  +                     for (i = start; i <= finish; i++)
  +                             if (ap_isalpha(i))
  +                                     CHadd(cs, i);
  +             } else {
  +                     for (i = start; i <= finish; i++)
  +                             CHadd(cs, i);
  +             }
  +#endif /*CHARSET_EBCDIC*/
                break;
        }
   }
  
  
  

Reply via email to