Before the 5.0 lock, i rewrote the sed(1) s/// loop to fix
multiple bugs with respect to zero-length matches.

The patch that went in,

  http://marc.info/?l=openbsd-tech&m=131145325927701&w=2

had to be backed out due to a regression:
When the input did not end in a trailing newline character
and there was an empty match at the end, the committed code
added a spurious '\0' character to the output.

A fix for that was posted, but not committed again
to not disrupt the 5.0 release:

  http://marc.info/?l=openbsd-tech&m=131167188124500&w=2

Here is the full patch against -current including the
fix to avoid the regression.

OK?


Index: process.c
===================================================================
RCS file: /cvs/src/usr.bin/sed/process.c,v
retrieving revision 1.17
diff -u -p -r1.17 process.c
--- process.c   26 Jul 2011 08:47:07 -0000      1.17
+++ process.c   17 Sep 2011 08:54:51 -0000
@@ -312,7 +312,7 @@ substitute(struct s_command *cp)
 {
        SPACE tspace;
        regex_t *re;
-       size_t re_off, slen;
+       regoff_t slen;
        int n, lastempty;
        char *s;
 
@@ -333,60 +333,55 @@ substitute(struct s_command *cp)
        n = cp->u.s->n;
        lastempty = 1;
 
-       switch (n) {
-       case 0:                                 /* Global */
-               do {
-                       if (lastempty || match[0].rm_so != match[0].rm_eo) {
-                               /* Locate start of replaced string. */
-                               re_off = match[0].rm_so;
-                               /* Copy leading retained string. */
-                               cspace(&SS, s, re_off, APPEND);
-                               /* Add in regular expression. */
-                               regsub(&SS, s, cp->u.s->new);
-                       }
+       do {
+               /* Copy the leading retained string. */
+               if (n <= 1 && match[0].rm_so)
+                       cspace(&SS, s, match[0].rm_so, APPEND);
 
-                       /* Move past this match. */
-                       if (match[0].rm_so != match[0].rm_eo) {
-                               s += match[0].rm_eo;
-                               slen -= match[0].rm_eo;
-                               lastempty = 0;
+               /* Skip zero-length matches right after other matches. */
+               if (lastempty || match[0].rm_so ||
+                   match[0].rm_so != match[0].rm_eo) {
+                       if (n <= 1) {
+                               /* Want this match: append replacement. */
+                               regsub(&SS, s, cp->u.s->new);
+                               if (n == 1)
+                                       n = -1;
                        } else {
-                               if (match[0].rm_so == 0)
-                                       cspace(&SS, s, match[0].rm_so + 1,
-                                           APPEND);
-                               else
-                                       cspace(&SS, s + match[0].rm_so, 1,
-                                           APPEND);
-                               s += match[0].rm_so + 1;
-                               slen -= match[0].rm_so + 1;
-                               lastempty = 1;
+                               /* Want a later match: append original. */
+                               if (match[0].rm_eo)
+                                       cspace(&SS, s, match[0].rm_eo, APPEND);
+                               n--;
                        }
-               } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
-               /* Copy trailing retained string. */
-               if (slen > 0)
-                       cspace(&SS, s, slen, APPEND);
-               break;
-       default:                                /* Nth occurrence */
-               while (--n) {
-                       s += match[0].rm_eo;
-                       slen -= match[0].rm_eo;
-                       if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
-                               return (0);
                }
-               /* FALLTHROUGH */
-       case 1:                                 /* 1st occurrence */
-               /* Locate start of replaced string. */
-               re_off = match[0].rm_so + (s - ps);
-               /* Copy leading retained string. */
-               cspace(&SS, ps, re_off, APPEND);
-               /* Add in regular expression. */
-               regsub(&SS, s, cp->u.s->new);
-               /* Copy trailing retained string. */
+
+               /* Move past this match. */
                s += match[0].rm_eo;
                slen -= match[0].rm_eo;
+
+               /*
+                * After a zero-length match, advance one byte,
+                * and at the end of the line, terminate.
+                */
+               if (match[0].rm_so == match[0].rm_eo) {
+                       if (*s == '\0' || *s == '\n')
+                               slen = -1;
+                       else
+                               slen--;
+                       if (*s != '\0')
+                               cspace(&SS, s++, 1, APPEND);
+                       lastempty = 1;
+               } else
+                       lastempty = 0;
+
+       } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
+
+       /* Did not find the requested number of matches. */
+       if (n > 1)
+               return (0);
+
+       /* Copy the trailing retained string. */
+       if (slen > 0)
                cspace(&SS, s, slen, APPEND);
-               break;
-       }
 
        /*
         * Swap the substitute space and the pattern space, and make sure

Reply via email to