POSIX says we should be counting column positions rather than codepoints, but I think that might be rather difficult to get right and this is probably an improvement already.
I know Laslo has studied this area for libgrapheme, so maybe he has suggestions. On 2020-06-20, Richard Ipsum <richardip...@vx21.xyz> wrote: > diff --git a/fold.c b/fold.c > index a5f320b..169064b 100644 > --- a/fold.c > +++ b/fold.c > @@ -7,6 +7,7 @@ > > #include "text.h" > #include "util.h" > +#include "utf.h" > > static int bflag = 0; > static int sflag = 0; > @@ -15,11 +16,14 @@ static size_t width = 80; > static void > foldline(struct line *l) { > size_t i, col, last, spacesect, len; > + Rune r; > + int runelen; > + > + for (i = 0, last = 0, col = 0, spacesect = 0; i < l->len; i += runelen) > { > There's a extraneous blank line here. > - for (i = 0, last = 0, col = 0, spacesect = 0; i < l->len; i++) { > - if (!UTF8_POINT(l->data[i]) && !bflag) > - continue; > if (col >= width && (l->data[i] != '\b' || bflag)) { > + if (bflag && col > width) > + i -= runelen; /* never split a character */ > len = ((sflag && spacesect) ? spacesect : i) - last; > if (fwrite(l->data + last, 1, len, stdout) != len) > eprintf("fwrite <stdout>:"); > @@ -29,6 +33,7 @@ foldline(struct line *l) { > col = 0; > spacesect = 0; > } > + runelen = chartorune(&r, l->data + i); I think we should use charntorune(&r, l->data + i, l->len - i) here. > if (sflag && isspace(l->data[i])) > spacesect = i + 1; > if (!bflag && iscntrl(l->data[i])) { > @@ -46,7 +51,7 @@ foldline(struct line *l) { > break; > } > } else { > - col++; > + col += bflag ? runelen : 1; > } > } > if (l->len - last)