ping

any OKs?

On 05/05/16 00:17, Martijn van Duren wrote:
> Hello tech@,
> 
> On 04/23/16 07:21, Jonathan Gray wrote on bugs@:
>> $ cat foo.sh
>> #!/bin/sh
>>
>> $1 -r '
>> s/[[:space:]]//g
>> s/\<sf([0-9]+),/ISL_SFLOAT@\1,/g
>> '
>> $ cat foo.csv
>> R32G32B32A32_FLOAT          , 128,  1,  1,  1, sf32, sf32, sf32, sf32,     , 
>>     ,    , linear,
>> $ ./foo.sh sed < foo.csv 
>> R32G32B32A32_FLOAT,128,1,1,1,ISL_SFLOAT@32,sf32,ISL_SFLOAT@32,sf32,,,,linear,
>> $ ./foo.sh gsed < foo.csv
>> R32G32B32A32_FLOAT,128,1,1,1,ISL_SFLOAT@32,ISL_SFLOAT@32,ISL_SFLOAT@32,ISL_SFLOAT@32,,,,linear,
>>
>> Encountered with code that was recently added to Mesa:
>>
>> https://cgit.freedesktop.org/mesa/mesa/tree/src/intel/isl/isl_format_layout_gen.bash
>>
> 
> After a long discussion with several other developers I came up with
> the following diff. It fixes the bug and also passes both the
> regression tests and a full bulk build (thanks to ajacoutot@).
> 
> I'd like to post it here for wider exposure. Please test to as much sed
> scripts as you can find. Comments and OKs welcome.
> 
> Diff looks sane to jasper@
> 
> For those interested: The problem comes from the fact that the string
> pointer increments to the end of the previous match and is then called
> with the REG_NOTBOL. The REG_NOTBOL combined with a match at the begin
> of the string causes our regex library to treat the word as not begin of
> word.
> The TRE implementation does the reverse and treats this case as if it
> always is begin of word. This causes a similar bug under MacOS:
> $ echo 'foo foofoo' | sed -E 's/\<foo/bar/g'
> bar barbar
> 
> I've solved this problem by converting sed to use REG_STARTEND more
> explicitly. Although this isn't a POSIX specified flag, it is already
> used by sed and shouldn't be a problem.
> 
> The patch in engine.c from the library is because beginp is set in 
> matcher to start, so this one always results to true.
> 
> I also tried to set up a testcase with gnu regex, but it turned out that
> they don't set re_nsub after regcomp, so the sed compilation phase fails. 
> After examining gnu sed's code I found that it uses an own internal API 
> not specified by POSIX. 
> 
> Index: ./lib/libc/regex/engine.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/regex/engine.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 engine.c
> --- ./lib/libc/regex/engine.c 28 Dec 2015 23:01:22 -0000      1.19
> +++ ./lib/libc/regex/engine.c 2 May 2016 08:50:20 -0000
> @@ -674,7 +674,7 @@ fast(struct match *m, char *start, char 
>       states fresh = m->fresh;
>       states tmp = m->tmp;
>       char *p = start;
> -     int c = (start == m->beginp) ? OUT : *(start-1);
> +     int c = (start == m->offp) ? OUT : *(start-1);
>       int lastc;      /* previous c */
>       int flagch;
>       int i;
> @@ -758,7 +758,7 @@ slow(struct match *m, char *start, char 
>       states empty = m->empty;
>       states tmp = m->tmp;
>       char *p = start;
> -     int c = (start == m->beginp) ? OUT : *(start-1);
> +     int c = (start == m->offp) ? OUT : *(start-1);
>       int lastc;      /* previous c */
>       int flagch;
>       int i;
> Index: ./usr.bin/sed/process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 process.c
> --- ./usr.bin/sed/process.c   26 Oct 2015 14:08:47 -0000      1.27
> +++ ./usr.bin/sed/process.c   2 May 2016 08:50:21 -0000
> @@ -61,7 +61,8 @@ static SPACE HS, PS, SS;
>  static inline int     applies(struct s_command *);
>  static void           flush_appends(void);
>  static void           lputs(char *);
> -static inline int     regexec_e(regex_t *, const char *, int, int, size_t);
> +static inline int     regexec_e(regex_t *, const char *, int, int, size_t,
> +                          size_t);
>  static void           regsub(SPACE *, char *, char *);
>  static int            substitute(struct s_command *);
>  
> @@ -267,7 +268,7 @@ new:              if (!nflag && !pd)
>   * (lastline, linenumber, ps).
>   */
>  #define      MATCH(a)                                                \
> -     (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :       \
> +     (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :    \
>           (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
>  
>  /*
> @@ -335,6 +336,7 @@ substitute(struct s_command *cp)
>       regex_t *re;
>       regoff_t slen;
>       int n, lastempty;
> +     size_t le = 0;
>       char *s;
>  
>       s = ps;
> @@ -346,7 +348,7 @@ substitute(struct s_command *cp)
>                           cp->u.s->maxbref);
>               }
>       }
> -     if (!regexec_e(re, s, 0, 0, psl))
> +     if (!regexec_e(re, s, 0, 0, 0, psl))
>               return (0);
>  
>       SS.len = 0;                             /* Clean substitute space. */
> @@ -356,28 +358,29 @@ substitute(struct s_command *cp)
>  
>       do {
>               /* Copy the leading retained string. */
> -             if (n <= 1 && match[0].rm_so)
> -                     cspace(&SS, s, match[0].rm_so, APPEND);
> +             if (n <= 1 && (match[0].rm_so - le))
> +                     cspace(&SS, s, match[0].rm_so - le, APPEND);
>  
>               /* Skip zero-length matches right after other matches. */
> -             if (lastempty || match[0].rm_so ||
> +             if (lastempty || (match[0].rm_so - le) ||
>                   match[0].rm_so != match[0].rm_eo) {
>                       if (n <= 1) {
>                               /* Want this match: append replacement. */
> -                             regsub(&SS, s, cp->u.s->new);
> +                             regsub(&SS, ps, cp->u.s->new);
>                               if (n == 1)
>                                       n = -1;
>                       } else {
>                               /* Want a later match: append original. */
> -                             if (match[0].rm_eo)
> -                                     cspace(&SS, s, match[0].rm_eo, APPEND);
> +                             if (match[0].rm_eo - le)
> +                                     cspace(&SS, s, match[0].rm_eo - le, 
> APPEND);
>                               n--;
>                       }
>               }
>  
>               /* Move past this match. */
> -             s += match[0].rm_eo;
> -             slen -= match[0].rm_eo;
> +             s += (match[0].rm_eo - le);
> +             slen -= (match[0].rm_eo - le);
> +             le = match[0].rm_eo;
>  
>               /*
>                * After a zero-length match, advance one byte,
> @@ -388,13 +391,15 @@ substitute(struct s_command *cp)
>                               slen = -1;
>                       else
>                               slen--;
> -                     if (*s != '\0')
> +                     if (*s != '\0') {
>                               cspace(&SS, s++, 1, APPEND);
> +                             le++;
> +                     }
>                       lastempty = 1;
>               } else
>                       lastempty = 0;
>  
> -     } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
> +     } while (n >= 0 && slen >= 0 && regexec_e(re, ps, 0, 0, le, psl));
>  
>       /* Did not find the requested number of matches. */
>       if (n > 1)
> @@ -509,7 +514,7 @@ lputs(char *s)
>  
>  static inline int
>  regexec_e(regex_t *preg, const char *string, int eflags,
> -    int nomatch, size_t slen)
> +    int nomatch, size_t start, size_t stop)
>  {
>       int eval;
>  
> @@ -520,8 +525,8 @@ regexec_e(regex_t *preg, const char *str
>               defpreg = preg;
>  
>       /* Set anchors */
> -     match[0].rm_so = 0;
> -     match[0].rm_eo = slen;
> +     match[0].rm_so = start;
> +     match[0].rm_eo = stop;
>  
>       eval = regexec(defpreg, string,
>           nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
> 

Reply via email to