it appears that sed won't handle an input line longer than 8k characters.
yet no diagnostic is printed.  and this is not mentioned in the
man page.  

example
        for(i in `{seq 3000 3849}) x = ($x IMG_$i.JPG)
        echo $x | sed 's/IMG_([0-9]+)\.JPG:\1:g'
no error is printed, yet the output is too short and the tail
is corrupted (... 3565 IM)

i added a diagnostic for input line too long.
in trying to test this with
        cat fileof8192is | sed 's/i/j/g'
i found that a single long line can result in at least
1 "sed: Output line too long." per character of the input
line.  i also addressed this problem.  just one output
too long message is printed per line.

the diff would be shorter if i weren't paranoid
that the output buffer could lose its null terminator.
i couldn't provoke that situation so perhaps i just haven't
read the source carefully enough.

- erik


/n/dump/2010/0315/sys/src/cmd/sed.c:161,166 - sed.c:161,167
  Rune  bad;                            /* Dummy err ptr reference */
  Rune  *badp = &bad;
  
+ int   tlwarn;                         /* during sub: have warned too long */
  
  char  CGMES[]  =      "%S command garbled: %S";
  char  TMMES[]  =      "Too much text: %S";
/n/dump/2010/0315/sys/src/cmd/sed.c:197,202 - sed.c:198,204
  char  *text(char *);
  Rune  *stext(Rune *, Rune *);
  int   ycomp(SedCom *);
+ void  toolong(void);
  char *        trans(int c);
  void  putline(Biobuf *bp, Rune *buf, int n);
  void  ebputc(Biobufhdr*, int);
/n/dump/2010/0315/sys/src/cmd/sed.c:697,705 - sed.c:699,708
  int
  rline(Rune *buf, Rune *end)
  {
-       long c;
+       long c, w;
        Rune r;
  
+       w = 0;
        while ((c = getrune()) >= 0) {
                r = c;
                if (r == '\\') {
/n/dump/2010/0315/sys/src/cmd/sed.c:714,719 - sed.c:717,726
                }
                if (buf <= end)
                        *buf++ = r;
+               else if(w == 0){
+                       fprint(2, "sed: Input line too long.\n");
+                       w = 1;
+               }
        }
        *buf = '\0';
        return -1;
/n/dump/2010/0315/sys/src/cmd/sed.c:1022,1027 - sed.c:1029,1035
         * bump to the character after a 0-length match to keep from looping.
         */
        sflag = 1;
+       tlwarn = 0;
        if(ipc->gfl == 0)                       /* single substitution */
                dosub(ipc->rhs);
        else
/n/dump/2010/0315/sys/src/cmd/sed.c:1065,1083 - sed.c:1073,1096
                                errexit();
                        }
                }
-               *sp++ = c;
-               if (sp >= &genbuf[LBSIZE])
-                       fprint(2, "sed: Output line too long.\n");
+               if(sp < &genbuf[LBSIZE]){
+                       *sp++ = c;
+                       if (sp >= &genbuf[LBSIZE])
+                               toolong();
+               }
        }
        lp = loc2;
        loc2 = sp - genbuf + linebuf;
        while (*sp++ = *lp++)
-               if (sp >= &genbuf[LBSIZE])
-                       fprint(2, "sed: Output line too long.\n");
+               if (sp >= &genbuf[LBSIZE]){
+                       toolong();
+                       break;
+               }
        lp = linebuf;
        sp = genbuf;
        while (*lp++ = *sp++)
-               ;
+               if (sp >= &genbuf[LBSIZE])
+                       break;
        spend = lp - 1;
  }
  
/n/dump/2010/0315/sys/src/cmd/sed.c:1086,1097 - sed.c:1099,1120
  {
        while (l1 < l2) {
                *sp++ = *l1++;
-               if (sp >= &genbuf[LBSIZE])
-                       fprint(2, "sed: Output line too long.\n");
+               if (sp >= &genbuf[LBSIZE]){
+                       toolong();
+                       break;
+               }
        }
        return sp;
  }
  
+ void
+ toolong(void)
+ {
+       if(tlwarn == 0)
+               fprint(2, "sed: Output line too long.\n");
+       tlwarn = 1;
+ }
+ 
  char *
  trans(int c)
  {
/n/dump/2010/0315/sys/src/cmd/sed.c:1408,1414 - sed.c:1431,1437
  Rune *
  gline(Rune *addr)
  {
-       long c;
+       long c, w;
        Rune *p;
        static long peekc = 0;
  
/n/dump/2010/0315/sys/src/cmd/sed.c:1417,1422 - sed.c:1440,1446
        sflag = 0;
        lnum++;
  /*    Bflush(&fout);********* dumped 4/30/92 - bobf****/
+       w = 0;
        do {
                p = addr;
                for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
/n/dump/2010/0315/sys/src/cmd/sed.c:1426,1433 - sed.c:1450,1463
                                *p = '\0';
                                return p;
                        }
-                       if (c && p < lbend)
-                               *p++ = c;
+                       if (c) {
+                               if (p < lbend)
+                                       *p++ = c;
+                               else if(w == 0) {
+                                       w = 1;
+                                       fprint(2, "sed: Input line too 
long.\n");
+                               }
+                       }
                }
                /* return partial final line, adding implicit newline */
                if(p != addr) {

Reply via email to