it appears that sed won't handle an input line longer than 8k characters. yet no diagnostic is printed. and this is not mentioned in the man page.
example for(i in `{seq 3000 3849}) x = ($x IMG_$i.JPG) echo $x | sed 's/IMG_([0-9]+)\.JPG:\1:g' no error is printed, yet the output is too short and the tail is corrupted (... 3565 IM) i added a diagnostic for input line too long. in trying to test this with cat fileof8192is | sed 's/i/j/g' i found that a single long line can result in at least 1 "sed: Output line too long." per character of the input line. i also addressed this problem. just one output too long message is printed per line. the diff would be shorter if i weren't paranoid that the output buffer could lose its null terminator. i couldn't provoke that situation so perhaps i just haven't read the source carefully enough. - erik /n/dump/2010/0315/sys/src/cmd/sed.c:161,166 - sed.c:161,167 Rune bad; /* Dummy err ptr reference */ Rune *badp = &bad; + int tlwarn; /* during sub: have warned too long */ char CGMES[] = "%S command garbled: %S"; char TMMES[] = "Too much text: %S"; /n/dump/2010/0315/sys/src/cmd/sed.c:197,202 - sed.c:198,204 char *text(char *); Rune *stext(Rune *, Rune *); int ycomp(SedCom *); + void toolong(void); char * trans(int c); void putline(Biobuf *bp, Rune *buf, int n); void ebputc(Biobufhdr*, int); /n/dump/2010/0315/sys/src/cmd/sed.c:697,705 - sed.c:699,708 int rline(Rune *buf, Rune *end) { - long c; + long c, w; Rune r; + w = 0; while ((c = getrune()) >= 0) { r = c; if (r == '\\') { /n/dump/2010/0315/sys/src/cmd/sed.c:714,719 - sed.c:717,726 } if (buf <= end) *buf++ = r; + else if(w == 0){ + fprint(2, "sed: Input line too long.\n"); + w = 1; + } } *buf = '\0'; return -1; /n/dump/2010/0315/sys/src/cmd/sed.c:1022,1027 - sed.c:1029,1035 * bump to the character after a 0-length match to keep from looping. */ sflag = 1; + tlwarn = 0; if(ipc->gfl == 0) /* single substitution */ dosub(ipc->rhs); else /n/dump/2010/0315/sys/src/cmd/sed.c:1065,1083 - sed.c:1073,1096 errexit(); } } - *sp++ = c; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); + if(sp < &genbuf[LBSIZE]){ + *sp++ = c; + if (sp >= &genbuf[LBSIZE]) + toolong(); + } } lp = loc2; loc2 = sp - genbuf + linebuf; while (*sp++ = *lp++) - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); + if (sp >= &genbuf[LBSIZE]){ + toolong(); + break; + } lp = linebuf; sp = genbuf; while (*lp++ = *sp++) - ; + if (sp >= &genbuf[LBSIZE]) + break; spend = lp - 1; } /n/dump/2010/0315/sys/src/cmd/sed.c:1086,1097 - sed.c:1099,1120 { while (l1 < l2) { *sp++ = *l1++; - if (sp >= &genbuf[LBSIZE]) - fprint(2, "sed: Output line too long.\n"); + if (sp >= &genbuf[LBSIZE]){ + toolong(); + break; + } } return sp; } + void + toolong(void) + { + if(tlwarn == 0) + fprint(2, "sed: Output line too long.\n"); + tlwarn = 1; + } + char * trans(int c) { /n/dump/2010/0315/sys/src/cmd/sed.c:1408,1414 - sed.c:1431,1437 Rune * gline(Rune *addr) { - long c; + long c, w; Rune *p; static long peekc = 0; /n/dump/2010/0315/sys/src/cmd/sed.c:1417,1422 - sed.c:1440,1446 sflag = 0; lnum++; /* Bflush(&fout);********* dumped 4/30/92 - bobf****/ + w = 0; do { p = addr; for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) { /n/dump/2010/0315/sys/src/cmd/sed.c:1426,1433 - sed.c:1450,1463 *p = '\0'; return p; } - if (c && p < lbend) - *p++ = c; + if (c) { + if (p < lbend) + *p++ = c; + else if(w == 0) { + w = 1; + fprint(2, "sed: Input line too long.\n"); + } + } } /* return partial final line, adding implicit newline */ if(p != addr) {