Before the 5.0 lock, i rewrote the sed(1) s/// loop to fix multiple bugs with respect to zero-length matches.
The patch that went in, http://marc.info/?l=openbsd-tech&m=131145325927701&w=2 had to be backed out due to a regression: When the input did not end in a trailing newline character and there was an empty match at the end, the committed code added a spurious '\0' character to the output. A fix for that was posted, but not committed again to not disrupt the 5.0 release: http://marc.info/?l=openbsd-tech&m=131167188124500&w=2 Here is the full patch against -current including the fix to avoid the regression. OK? Index: process.c =================================================================== RCS file: /cvs/src/usr.bin/sed/process.c,v retrieving revision 1.17 diff -u -p -r1.17 process.c --- process.c 26 Jul 2011 08:47:07 -0000 1.17 +++ process.c 17 Sep 2011 08:54:51 -0000 @@ -312,7 +312,7 @@ substitute(struct s_command *cp) { SPACE tspace; regex_t *re; - size_t re_off, slen; + regoff_t slen; int n, lastempty; char *s; @@ -333,60 +333,55 @@ substitute(struct s_command *cp) n = cp->u.s->n; lastempty = 1; - switch (n) { - case 0: /* Global */ - do { - if (lastempty || match[0].rm_so != match[0].rm_eo) { - /* Locate start of replaced string. */ - re_off = match[0].rm_so; - /* Copy leading retained string. */ - cspace(&SS, s, re_off, APPEND); - /* Add in regular expression. */ - regsub(&SS, s, cp->u.s->new); - } + do { + /* Copy the leading retained string. */ + if (n <= 1 && match[0].rm_so) + cspace(&SS, s, match[0].rm_so, APPEND); - /* Move past this match. */ - if (match[0].rm_so != match[0].rm_eo) { - s += match[0].rm_eo; - slen -= match[0].rm_eo; - lastempty = 0; + /* Skip zero-length matches right after other matches. */ + if (lastempty || match[0].rm_so || + match[0].rm_so != match[0].rm_eo) { + if (n <= 1) { + /* Want this match: append replacement. */ + regsub(&SS, s, cp->u.s->new); + if (n == 1) + n = -1; } else { - if (match[0].rm_so == 0) - cspace(&SS, s, match[0].rm_so + 1, - APPEND); - else - cspace(&SS, s + match[0].rm_so, 1, - APPEND); - s += match[0].rm_so + 1; - slen -= match[0].rm_so + 1; - lastempty = 1; + /* Want a later match: append original. */ + if (match[0].rm_eo) + cspace(&SS, s, match[0].rm_eo, APPEND); + n--; } - } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); - /* Copy trailing retained string. */ - if (slen > 0) - cspace(&SS, s, slen, APPEND); - break; - default: /* Nth occurrence */ - while (--n) { - s += match[0].rm_eo; - slen -= match[0].rm_eo; - if (!regexec_e(re, s, REG_NOTBOL, 0, slen)) - return (0); } - /* FALLTHROUGH */ - case 1: /* 1st occurrence */ - /* Locate start of replaced string. */ - re_off = match[0].rm_so + (s - ps); - /* Copy leading retained string. */ - cspace(&SS, ps, re_off, APPEND); - /* Add in regular expression. */ - regsub(&SS, s, cp->u.s->new); - /* Copy trailing retained string. */ + + /* Move past this match. */ s += match[0].rm_eo; slen -= match[0].rm_eo; + + /* + * After a zero-length match, advance one byte, + * and at the end of the line, terminate. + */ + if (match[0].rm_so == match[0].rm_eo) { + if (*s == '\0' || *s == '\n') + slen = -1; + else + slen--; + if (*s != '\0') + cspace(&SS, s++, 1, APPEND); + lastempty = 1; + } else + lastempty = 0; + + } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen)); + + /* Did not find the requested number of matches. */ + if (n > 1) + return (0); + + /* Copy the trailing retained string. */ + if (slen > 0) cspace(&SS, s, slen, APPEND); - break; - } /* * Swap the substitute space and the pattern space, and make sure