Hello,
Added support for utf-8 editing in emacs mode and vi mode for ksh,
and fixed some utf-8 encoding issues.
it is good?
Index: bin/ksh/emacs.c
===================================================================
RCS file: /cvs/src/bin/ksh/emacs.c,v
retrieving revision 1.89
diff -u -p -u -r1.89 emacs.c
--- bin/ksh/emacs.c 9 Oct 2021 21:38:00 -0000 1.89
+++ bin/ksh/emacs.c 12 Aug 2022 01:19:52 -0000
@@ -21,6 +21,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#ifndef SMALL
# include <term.h>
# include <curses.h>
@@ -102,6 +103,7 @@ static int x_adj_done;
static int xx_cols;
static int x_col;
static int x_displen;
+static int x_edp_len;
static int x_arg; /* general purpose arg */
static int x_arg_defaulted;/* x_arg not explicitly set; defaulted to 1 */
@@ -133,6 +135,7 @@ static int x_size_str(char *);
static int x_size(int);
static void x_zots(char *);
static void x_zotc(int);
+static void x_zotu8c(const char *, int);
static void x_load_hist(char **);
static int x_search(char *, int, int);
static int x_match(char *, char *);
@@ -143,12 +146,15 @@ static void x_e_ungetc(int);
static int x_e_getc(void);
static int x_e_getu8(char *, int);
static void x_e_putc(int);
+static void x_e_putu8c(const char *, int);
static void x_e_puts(const char *);
static int x_comment(int);
static int x_fold_case(int);
static char *x_lastcp(void);
static void do_complete(int, Comp_type);
static int isu8cont(unsigned char);
+static int u8code(const char *);
+static int u8mblen(unsigned char);
/* proto's for keybindings */
static int x_abort(int);
@@ -276,6 +282,54 @@ isu8cont(unsigned char c)
}
int
+u8code(const char *str)
+{
+ int wc, i, len;
+ unsigned char c;
+
+ wc = 0;
+ c = *str++;
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5) {
+ wc = c & 0x07;
+ len = 4;
+ } else if ((c & 0xf0) == 0xe0) {
+ wc = c & 0x0f;
+ len = 3;
+ } else if ((c & 0xe0) == 0xc0 && c > 0xc1) {
+ wc = c & 0x1f;
+ len = 2;
+ } else {
+ len = 1;
+ wc = c & 0xff;
+ }
+
+ for (i = 1; i < len; i++) {
+ c = *str++;
+ wc = (wc << 6) | (c & 0x3f);
+ }
+
+ return wc;
+}
+
+int
+u8mblen(unsigned char c)
+{
+ int len;
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5)
+ len = 4;
+ else if ((c & 0xf0) == 0xe0)
+ len = 3;
+ else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+ len = 2;
+ else
+ len = 1;
+
+ return len;
+}
+
+int
x_emacs(char *buf, size_t len)
{
struct kb_entry *k, *kmatch = NULL;
@@ -449,6 +503,7 @@ x_ins(char *s)
{
char *cp = xcp;
int adj = x_adj_done;
+ int wc;
if (x_do_ins(s, strlen(s)) < 0)
return -1;
@@ -462,8 +517,12 @@ x_ins(char *s)
x_zots(cp);
if (adj == x_adj_done) { /* has x_adjust() been called? */
/* no */
- for (cp = xlp; cp > xcp; )
- x_bs(*--cp);
+ for (cp = xlp; cp > xcp; ) {
+ if (isu8cont(*--cp))
+ continue;
+ wc = u8code(cp);
+ x_bs(wc);
+ }
}
x_adj_ok = 1;
@@ -511,6 +570,7 @@ x_delete(int nc, int push)
{
int i,j;
char *cp;
+ int len, wc;
if (nc == 0)
return;
@@ -531,8 +591,12 @@ x_delete(int nc, int push)
cp = xcp;
j = 0;
i = nc;
- while (i--) {
- j += x_size((unsigned char)*cp++);
+ while (i) {
+ wc = u8code(cp);
+ len = u8mblen(*cp);
+ j += x_size(wc);
+ cp += len;
+ i -= len;
}
memmove(xcp, xcp+nc, xep - xcp + 1); /* Copies the null */
x_adj_ok = 0; /* don't redraw */
@@ -555,8 +619,12 @@ x_delete(int nc, int push)
/*x_goto(xcp);*/
x_adj_ok = 1;
xlp_valid = false;
- for (cp = x_lastcp(); cp > xcp; )
- x_bs(*--cp);
+ for (cp = x_lastcp(); cp > xcp; ) {
+ if (isu8cont(*--cp))
+ continue;
+ wc = u8code(cp);
+ x_bs(wc);
+ }
return;
}
@@ -639,16 +707,34 @@ x_fword(void)
static void
x_goto(char *cp)
{
- if (cp < xbp || cp >= (xbp + x_displen)) {
+ char *ecp;
+ int i, len, wc;
+
+ for (i = 0, ecp = xbp; ecp < xep && i < x_displen; ecp += len) {
+ wc = u8code(ecp);
+ len = u8mblen(*ecp);
+ if (i + x_size(wc) >= x_displen)
+ break;
+ i += x_size(wc);
+ }
+
+ if (cp < xbp || cp >= ecp) {
/* we are heading off screen */
xcp = cp;
x_adjust();
} else if (cp < xcp) { /* move back */
- while (cp < xcp)
- x_bs((unsigned char)*--xcp);
+ while (cp < xcp) {
+ if (isu8cont(*--xcp))
+ continue;
+ wc = u8code(xcp);
+ x_bs(wc);
+ }
} else if (cp > xcp) { /* move forward */
- while (cp > xcp)
- x_zotc((unsigned char)*xcp++);
+ while (cp > xcp) {
+ len = u8mblen(*xcp);
+ x_zotu8c(xcp, len);
+ xcp += len;
+ }
}
}
@@ -666,27 +752,37 @@ static int
x_size_str(char *cp)
{
int size = 0;
- while (*cp)
- size += x_size(*cp++);
+ int len, wc;
+ while (*cp) {
+ wc = u8code(cp);
+ len = u8mblen(*cp);
+ size += x_size(wc);
+ cp += len;
+ }
return size;
}
static int
x_size(int c)
{
+ int w;
+ if (c=='\b')
+ return -1;
if (c=='\t')
return 4; /* Kludge, tabs are always four spaces. */
- if (iscntrl(c)) /* control char */
+ if (c < 255 && iscntrl(c)) /* control char */
return 2;
- if (isu8cont(c))
- return 0;
- return 1;
+ w = wcwidth(c);
+ if (w == -1)
+ return 1;
+ return w;
}
static void
x_zots(char *str)
{
int adj = x_adj_done;
+ int len;
if (str > xbuf && isu8cont(*str)) {
while (str > xbuf && isu8cont(*str))
@@ -694,8 +790,17 @@ x_zots(char *str)
x_e_putc('\b');
}
x_lastcp();
- while (*str && str < xlp && adj == x_adj_done)
- x_zotc(*str++);
+ if (str >= xlp && x_edp_len != 0 && adj == x_adj_done) {
+ len = u8mblen(*str);
+ x_zotu8c(str, len);
+ str += len;
+ }
+
+ while (*str && str < xlp && adj == x_adj_done) {
+ len = u8mblen(*str);
+ x_zotu8c(str, len);
+ str += len;
+ }
}
static void
@@ -711,6 +816,15 @@ x_zotc(int c)
x_e_putc(c);
}
+static void
+x_zotu8c(const char *str, int len)
+{
+ if (len == 1)
+ return x_zotc(*str);
+ else
+ return x_e_putu8c(str, len);
+}
+
static int
x_mv_back(int c)
{
@@ -1032,6 +1146,7 @@ x_redraw(int limit)
{
int i, j, truncate = 0;
char *cp;
+ int len, wc;
x_adj_ok = 0;
if (limit == -2) {
@@ -1071,9 +1186,15 @@ x_redraw(int limit)
limit = xx_cols;
if (limit >= 0) {
if (xep > xlp)
- i = 0; /* we fill the line */
- else
- i = limit - (xlp - xbp);
+ i = x_edp_len; /* we fill the line */
+ else {
+ i = limit;
+ for (cp = xbp; cp < xlp; cp += len) {
+ wc = u8code(cp);
+ len = u8mblen(*cp);
+ i -= x_size(wc);
+ }
+ }
for (j = 0; j < i && x_col < (xx_cols - 2); j++)
x_e_putc(' ');
@@ -1090,8 +1211,12 @@ x_redraw(int limit)
while (j--)
x_e_putc('\b');
}
- for (cp = xlp; cp > xcp; )
- x_bs(*--cp);
+ for (cp = xcp; cp < xlp;) {
+ wc = u8code(cp);
+ len = u8mblen(*cp);
+ x_bs(wc);
+ cp += len;
+ }
x_adj_ok = 1;
#ifdef DEBUG
x_flush();
@@ -1102,7 +1227,10 @@ x_redraw(int limit)
static int
x_transpose(int c)
{
- char tmp;
+ char tmpu8[4];
+ int blen;
+ int bpos, bbpos;
+ int i;
/* What transpose is meant to do seems to be up for debate. This
* is a general summary of the options; the text is abcd with the
@@ -1128,25 +1256,41 @@ x_transpose(int c)
/* Gosling/Unipress emacs style: Swap two characters before the
* cursor, do not change cursor position
*/
- x_bs(xcp[-1]);
- x_bs(xcp[-2]);
- x_zotc(xcp[-1]);
- x_zotc(xcp[-2]);
- tmp = xcp[-1];
- xcp[-1] = xcp[-2];
- xcp[-2] = tmp;
+ bpos = -1;
+ while (isu8cont(xcp[bpos]))
+ bpos--;
+ bbpos = bpos - 1;
+ while (isu8cont(xcp[bbpos]))
+ bbpos--;
+ x_bs(u8code(xcp + bpos));
+ x_bs(u8code(xcp + bbpos));
+ blen = u8mblen(xcp[bpos]);
+ x_zotu8c(xcp + bpos, blen);
+ i = u8mblen(xcp[bbpos]);
+ x_zotu8c(xcp + bbpos, i);
+ memmove(tmpu8, xcp + bbpos, i);
+ memmove(xcp + bbpos, xcp + bpos, blen);
+ memmove(xcp + bbpos + blen, tmpu8, i);
} else {
/* GNU emacs style: Swap the characters before and under the
* cursor, move cursor position along one.
*/
- x_bs(xcp[-1]);
- x_zotc(xcp[0]);
- x_zotc(xcp[-1]);
- tmp = xcp[-1];
- xcp[-1] = xcp[0];
- xcp[0] = tmp;
- x_bs(xcp[0]);
- x_goto(xcp + 1);
+ bpos = -1;
+ while (isu8cont(xcp[bpos]))
+ bpos--;
+ x_bs(u8code(xcp + bpos));
+ blen = u8mblen(xcp[0]);
+ x_zotu8c(xcp, blen);
+ i = u8mblen(xcp[bpos]);
+ x_zotu8c(xcp + bpos, i);
+
+ memmove(tmpu8, xcp + bpos, i);
+ memmove(xcp + bpos, xcp, blen);
+ memmove(xcp + bpos + blen, tmpu8, i);
+
+ if (blen >= i)
+ x_bs(u8code(xcp + bpos + blen));
+ x_goto(xcp + bpos + blen + i);
}
return KSTD;
}
@@ -1802,12 +1946,22 @@ do_complete(int flags, /* XCF_{COMMAND,F
static void
x_adjust(void)
{
+ int i;
+ int wc;
+
x_adj_done++; /* flag the fact that we were called. */
/*
* we had a problem if the prompt length > xx_cols / 2
*/
- if ((xbp = xcp - (x_displen / 2)) < xbuf)
- xbp = xbuf;
+ for (i = 0, xbp = xcp; xbp > xbuf && i < (x_displen / 2); xbp--) {
+ if (isu8cont(*xbp))
+ continue;
+
+ wc = u8code(xbp);
+ if (i + x_size(wc) >= (x_displen / 2))
+ break;
+ i += x_size(wc);
+ }
xlp_valid = false;
x_redraw(xx_cols);
x_flush();
@@ -1911,6 +2065,24 @@ x_e_putc(int c)
x_adjust();
}
+static void
+x_e_putu8c(const char *str, int len) {
+ int i;
+ int wc, w;
+
+ wc = u8code(str);
+ w = x_size(wc);
+ if (x_col < xx_cols - w + 1) {
+ for (i = 0; i < len; i++) {
+ x_putc(str[i]);
+ }
+ x_col += w;
+ }
+
+ if (x_adj_ok && (x_col < 0 || x_col >= (xx_cols - 2 - w + 1)))
+ x_adjust();
+}
+
#ifdef DEBUG
static int
x_debug_info(int c)
@@ -1933,9 +2105,16 @@ static void
x_e_puts(const char *s)
{
int adj = x_adj_done;
+ int len;
- while (*s && adj == x_adj_done)
- x_e_putc(*s++);
+ while (*s && adj == x_adj_done) {
+ len = u8mblen(*s);
+ if (len == 1)
+ x_e_putc(*s);
+ else
+ x_e_putu8c(s, len);
+ s += len;
+ }
}
/* NAME:
@@ -2156,10 +2335,22 @@ x_lastcp(void)
{
char *rcp;
int i;
+ int len;
+ int wc;
if (!xlp_valid) {
- for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp++)
- i += x_size((unsigned char)*rcp);
+ x_edp_len = 0;
+ for (i = 0, rcp = xbp; rcp < xep && i < x_displen; rcp += len) {
+ len = u8mblen(*rcp);
+ wc = u8code(rcp);
+
+ if (i + x_size(wc) > x_displen) {
+ x_edp_len = x_displen - i;
+ break;
+ }
+ if (rcp + len <= xep)
+ i += x_size(wc);
+ }
xlp = rcp;
}
xlp_valid = true;
Index: bin/ksh/main.c
===================================================================
RCS file: /cvs/src/bin/ksh/main.c,v
retrieving revision 1.98
diff -u -p -u -r1.98 main.c
--- bin/ksh/main.c 28 Jun 2019 13:34:59 -0000 1.98
+++ bin/ksh/main.c 12 Aug 2022 01:19:52 -0000
@@ -8,6 +8,7 @@
#include <errno.h>
#include <fcntl.h>
+#include <locale.h>
#include <paths.h>
#include <pwd.h>
#include <stdio.h>
@@ -145,6 +146,8 @@ main(int argc, char *argv[])
pid_t ppid;
kshname = argv[0];
+
+ setlocale(LC_CTYPE, "");
if (issetugid()) { /* could later drop privileges */
if (pledge("stdio rpath wpath cpath fattr flock getpw proc "
Index: bin/ksh/vi.c
===================================================================
RCS file: /cvs/src/bin/ksh/vi.c,v
retrieving revision 1.60
diff -u -p -u -r1.60 vi.c
--- bin/ksh/vi.c 12 Mar 2021 02:10:25 -0000 1.60
+++ bin/ksh/vi.c 12 Aug 2022 01:19:53 -0000
@@ -14,6 +14,7 @@
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#ifndef SMALL
# include <term.h>
# include <curses.h>
@@ -73,8 +74,14 @@ static void x_vi_zotc(int);
static void vi_pprompt(int);
static void vi_error(void);
static void vi_macro_reset(void);
+static void x_vi_ungetc(int);
+static int x_vi_getc(void);
+static int x_vi_getu8(char, char *, int);
static int x_vi_putbuf(const char *, size_t);
static int isu8cont(unsigned char);
+static int u8code(const char *);
+static int u8mblen(unsigned char);
+static int u8width(int);
#define C_ 0x1 /* a valid command that isn't a M_, E_, U_ */
#define M_ 0x2 /* movement command (h, l, etc.) */
@@ -213,10 +220,10 @@ x_vi(char *buf, size_t len)
continue;
/* must be the end of all the macros */
vi_macro_reset();
- c = x_getc();
+ c = x_vi_getc();
}
} else
- c = x_getc();
+ c = x_vi_getc();
if (c == -1)
break;
@@ -260,6 +267,8 @@ vi_hook(int ch)
{
static char curcmd[MAXVICMD], locpat[SRCHLEN];
static int cmdlen, argc1, argc2;
+ int i, len;
+ char u8c[4];
switch (state) {
@@ -314,8 +323,11 @@ vi_hook(int ch)
if (is_bad(ch)) {
del_range(es->cursor, es->cursor + 1);
vi_error();
- } else
- es->cbuf[es->cursor++] = ch;
+ } else {
+ len = x_vi_getu8(ch, es->cbuf, es->cursor);
+ es->cursor += len;
+ es->linelen += len - 1;
+ }
refresh_line(1);
state = VNORMAL;
break;
@@ -412,7 +424,6 @@ vi_hook(int ch)
return 0;
} else if (ch == edchars.werase) {
struct edstate new_es, *save_es;
- int i;
int n = srchlen;
new_es.cursor = n;
@@ -430,10 +441,16 @@ vi_hook(int ch)
refresh_line(0);
return 0;
} else {
- if (srchlen == SRCHLEN - 1)
+ if (srchlen >= SRCHLEN - 1)
vi_error();
else {
- locpat[srchlen++] = ch;
+ len = x_vi_getu8(ch, u8c, 0);
+ for (i = 0; i < len; i++) {
+ locpat[srchlen++] = u8c[i];
+ if (srchlen >= SRCHLEN - 1)
+ vi_error();
+ }
+
if ((ch & 0x80) && Flag(FVISHOW8)) {
if (es->linelen + 2 > es->cbufsize)
vi_error();
@@ -449,7 +466,8 @@ vi_hook(int ch)
} else {
if (es->linelen >= es->cbufsize)
vi_error();
- es->cbuf[es->linelen++] = ch;
+ for (i = 0; i < len; i++)
+ es->cbuf[es->linelen++] = u8c[i];
}
es->cursor = es->linelen;
refresh_line(0);
@@ -537,7 +555,9 @@ vi_reset(char *buf, size_t len)
static int
nextstate(int ch)
{
- if (is_extend(ch))
+ if (ch & 0x80)
+ return VFAIL;
+ else if (is_extend(ch))
return VEXTCMD;
else if (is_srch(ch))
return VSEARCH;
@@ -555,6 +575,9 @@ static int
vi_insert(int ch)
{
int tcursor;
+ int i, len;
+ char u8c[4];
+
if (ch == edchars.erase || ch == CTRL('h')) {
if (insert == REPLACE) {
@@ -674,17 +697,21 @@ vi_insert(int ch)
/* End nonstandard vi commands } */
default:
- if (es->linelen >= es->cbufsize - 1)
+ len = x_vi_getu8(ch, u8c, 0);
+
+ if (es->linelen >= es->cbufsize - len)
return -1;
- ibuf[inslen++] = ch;
+ for (i = 0; i < len; i++)
+ ibuf[inslen++] = u8c[i];
if (insert == INSERT) {
- memmove(&es->cbuf[es->cursor+1], &es->cbuf[es->cursor],
+ memmove(&es->cbuf[es->cursor+len], &es->cbuf[es->cursor],
es->linelen - es->cursor);
- es->linelen++;
+ es->linelen += len;
}
- es->cbuf[es->cursor++] = ch;
+ for (i = 0; i < len; i++)
+ es->cbuf[es->cursor++] = u8c[i];
if (insert == REPLACE && es->cursor > es->linelen)
- es->linelen++;
+ es->linelen += len;
expanded = NONE;
}
return 0;
@@ -696,6 +723,8 @@ vi_cmd(int argcnt, const char *cmd)
int ncursor;
int cur, c1, c2, c3 = 0;
int any;
+ int len;
+ char u8c[4];
struct edstate *t;
if (argcnt == 0 && !is_zerocount(*cmd))
@@ -769,7 +798,7 @@ vi_cmd(int argcnt, const char *cmd)
case 'a':
modified = 1; hnum = hlast;
if (es->linelen != 0)
- while (isu8cont(es->cbuf[++es->cursor]))
+ while (isu8cont(es->cbuf[++es->cursor]) && es->cursor
< es->linelen) /* fix cursor great linelen */
continue;
insert = INSERT;
break;
@@ -835,11 +864,11 @@ vi_cmd(int argcnt, const char *cmd)
case 'p':
modified = 1; hnum = hlast;
if (es->linelen != 0)
- es->cursor++;
+ es->cursor += u8mblen(es->cbuf[es->cursor]);
while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0)
;
- if (es->cursor != 0)
- es->cursor--;
+ while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+ ;
if (argcnt != 0)
return -1;
break;
@@ -849,8 +878,8 @@ vi_cmd(int argcnt, const char *cmd)
any = 0;
while (putbuf(ybuf, yanklen, 0) == 0 && --argcnt > 0)
any = 1;
- if (any && es->cursor != 0)
- es->cursor--;
+ while (any && es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+ ;
if (argcnt != 0)
return -1;
break;
@@ -864,8 +893,8 @@ vi_cmd(int argcnt, const char *cmd)
case 'D':
yank_range(es->cursor, es->linelen);
del_range(es->cursor, es->linelen);
- if (es->cursor != 0)
- es->cursor--;
+ while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+ ;
break;
case 'g':
@@ -937,8 +966,9 @@ vi_cmd(int argcnt, const char *cmd)
return -1;
del_range(es->cursor, cur);
+ len = x_vi_getu8(cmd[1], u8c, 0);
while (argcnt-- > 0)
- putbuf(&cmd[1], 1, 0);
+ putbuf(u8c, len, 0);
while (es->cursor > 0)
if (!isu8cont(es->cbuf[--es->cursor]))
break;
@@ -1090,7 +1120,7 @@ vi_cmd(int argcnt, const char *cmd)
}
modified = 1; hnum = hlast;
if (es->cursor != es->linelen)
- es->cursor++;
+ es->cursor += u8mblen(es->cbuf[es->cursor]);
while (*p && !issp(*p)) {
argcnt++;
p++;
@@ -1100,8 +1130,8 @@ vi_cmd(int argcnt, const char *cmd)
else if (putbuf(sp, argcnt, 0) != 0)
argcnt = -1;
if (argcnt < 0) {
- if (es->cursor != 0)
- es->cursor--;
+ while (es->cursor != 0 && isu8cont(es->cbuf[--es->cursor]))
+ ;
return -1;
}
insert = INSERT;
@@ -1125,8 +1155,8 @@ vi_cmd(int argcnt, const char *cmd)
modified = 1; hnum = hlast;
*p = tolower(c);
}
- if (es->cursor < es->linelen - 1)
- es->cursor++;
+ if (es->cursor < es->linelen - u8mblen(es->cbuf[es->cursor]))
+ es->cursor += u8mblen(es->cbuf[es->cursor]);
}
break;
}
@@ -1218,7 +1248,7 @@ domove(int argcnt, const char *cmd, int
if ((ncursor = findch(fsavech, argcnt, t, i)) < 0)
return -1;
if (sub && t)
- ncursor++;
+ ncursor += u8mblen(es->cbuf[ncursor]);
break;
case 'h':
@@ -1277,16 +1307,16 @@ domove(int argcnt, const char *cmd, int
ncursor = es->cursor;
while (ncursor < es->linelen &&
(i = bracktype(es->cbuf[ncursor])) == 0)
- ncursor++;
+ ncursor += u8mblen(es->cbuf[ncursor]);
if (ncursor == es->linelen)
return -1;
bcount = 1;
do {
if (i > 0) {
- if (++ncursor >= es->linelen)
+ if ((ncursor += u8mblen(es->cbuf[ncursor])) >= es->linelen)
return -1;
} else {
- if (--ncursor < 0)
+ if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0)
return -1;
}
t = bracktype(es->cbuf[ncursor]);
@@ -1296,7 +1326,7 @@ domove(int argcnt, const char *cmd, int
bcount--;
} while (bcount != 0);
if (sub && i > 0)
- ncursor++;
+ ncursor += u8mblen(es->cbuf[ncursor]);
break;
default:
@@ -1444,8 +1474,8 @@ edit_reset(char *buf, size_t len)
pwidth -= prompt_trunc;
} else
prompt_trunc = 0;
- if (!wbuf_len || wbuf_len != x_cols - 3) {
- wbuf_len = x_cols - 3;
+ if (!wbuf_len || wbuf_len != (x_cols - 3) * 4) { /* The utf-8 has
a maximum of 4 bytes per character. */
+ wbuf_len = (x_cols - 3) * 4;
wbuf[0] = aresize(wbuf[0], wbuf_len, APERM);
wbuf[1] = aresize(wbuf[1], wbuf_len, APERM);
}
@@ -1457,6 +1487,70 @@ edit_reset(char *buf, size_t len)
holdlen = 0;
}
+static int unget_char = -1;
+
+static void
+x_vi_ungetc(int c)
+{
+ unget_char = c;
+}
+
+static int
+x_vi_getc(void)
+{
+ int c;
+
+ if (unget_char != -1) {
+ c = unget_char;
+ unget_char = -1;
+ } else
+ c = x_getc();
+
+ return c;
+}
+
+static int
+x_vi_getu8(char firstch, char *buf, int off)
+{
+ int c, cc, i, j, len;
+ char u8c[4];
+
+ c = firstch;
+
+ if (isu8cont(c))
+ return 0;
+ i = 0;
+ u8c[i++] = c;
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5)
+ len = 4;
+ else if ((c & 0xf0) == 0xe0)
+ len = 3;
+ else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+ len = 2;
+ else
+ len = 1;
+
+ for (j = len; j > 1; j--) {
+ cc = x_vi_getc();
+ if (cc == -1)
+ return 0;
+ if (isu8cont(cc) == 0 ||
+ (c == 0xe0 && j == 3 && cc < 0xa0) ||
+ (c == 0xed && j == 3 && cc & 0x20) ||
+ (c == 0xf4 && j == 4 && cc & 0x30)) {
+ x_vi_ungetc(cc);
+ return 0;
+ }
+ u8c[i++] = cc;
+ }
+
+ for (i = 0; i < len; i++)
+ buf[off + i] = u8c[i];
+
+ return len;
+}
+
/*
* this is used for calling x_escape() in complete_word()
*/
@@ -1500,26 +1594,29 @@ static int
findch(int ch, int cnt, int forw, int incl)
{
int ncursor;
+ char u8c[4];
+ int len;
if (es->linelen == 0)
return -1;
ncursor = es->cursor;
+ len = x_vi_getu8(ch, u8c, 0);
while (cnt--) {
do {
if (forw) {
- if (++ncursor == es->linelen)
+ if ((ncursor += u8mblen(es->cbuf[ncursor])) == es->linelen)
return -1;
} else {
- if (--ncursor < 0)
+ if ((ncursor -= u8mblen(es->cbuf[ncursor])) < 0)
return -1;
}
- } while (es->cbuf[ncursor] != ch);
+ } while (memcmp(es->cbuf + ncursor, u8c, len));
}
if (!incl) {
if (forw)
- ncursor--;
+ while (ncursor > 0 && isu8cont(es->cbuf[--ncursor]));
else
- ncursor++;
+ ncursor += u8mblen(es->cbuf[ncursor]);
}
return ncursor;
}
@@ -1774,13 +1871,17 @@ static int
outofwin(void)
{
int cur, col;
+ int wc;
if (es->cursor < es->winleft)
return 1;
col = 0;
cur = es->winleft;
- while (cur < es->cursor)
- col = newcol((unsigned char) es->cbuf[cur++], col);
+ while (cur < es->cursor) {
+ wc = u8code(&es->cbuf[cur]);
+ col = newcol(wc, col);
+ cur += u8mblen(es->cbuf[cur]);
+ }
if (col >= winwidth)
return 1;
return 0;
@@ -1792,6 +1893,7 @@ rewindow(void)
int tcur, tcol;
int holdcur1, holdcol1;
int holdcur2, holdcol2;
+ int wc;
holdcur1 = holdcur2 = tcur = 0;
holdcol1 = holdcol2 = tcol = 0;
@@ -1802,11 +1904,15 @@ rewindow(void)
holdcur2 = tcur;
holdcol2 = tcol;
}
- tcol = newcol((unsigned char) es->cbuf[tcur++], tcol);
+ wc = u8code(&es->cbuf[tcur]);
+ tcol = newcol(wc, tcol);
+ tcur += u8mblen(es->cbuf[tcur]);
+ }
+ while (tcol - holdcol1 > winwidth / 2) {
+ wc = u8code(&es->cbuf[holdcur1]);
+ holdcol1 = newcol(wc, holdcol1);
+ holdcur1 += u8mblen(es->cbuf[holdcur1]);
}
- while (tcol - holdcol1 > winwidth / 2)
- holdcol1 = newcol((unsigned char) es->cbuf[holdcur1++],
- holdcol1);
es->winleft = holdcur1;
}
@@ -1814,11 +1920,7 @@ rewindow(void)
static int
newcol(int ch, int col)
{
- if (ch == '\t')
- return (col | 7) + 1;
- if (isu8cont(ch))
- return col;
- return col + char_len(ch);
+ return col + u8width(ch);
}
/* Display wb1 assuming that wb2 is currently displayed. */
@@ -1827,7 +1929,6 @@ display(char *wb1, char *wb2, int leftsi
{
char *twb1; /* pointer into the buffer to display */
char *twb2; /* pointer into the previous display buffer */
- static int lastb = -1; /* last byte# written from wb1, if UTF-8 */
int cur; /* byte# in the main command line buffer */
int col; /* display column loop variable */
int ncol; /* display column of the cursor */
@@ -1835,6 +1936,9 @@ display(char *wb1, char *wb2, int leftsi
int moreright;
char mc; /* new "more character" at the right of window */
unsigned char ch;
+ int wc1, wc2, w1col, w2col;
+ int i, len, wc, u8w;
+
/*
* Fill the current display buffer with data from cbuf.
@@ -1846,6 +1950,7 @@ display(char *wb1, char *wb2, int leftsi
moreright = 0;
twb1 = wb1;
while (col < winwidth && cur < es->linelen) {
+ u8w = 1;
if (cur == es->cursor && leftside)
ncol = col + pwidth;
if ((ch = es->cbuf[cur]) == '\t') {
@@ -1869,15 +1974,21 @@ display(char *wb1, char *wb2, int leftsi
col++;
}
} else {
- *twb1++ = ch;
- if (!isu8cont(ch))
- col++;
+ len = u8mblen(ch);
+ for (i = 0; i < len; i++)
+ *twb1++ = es->cbuf[cur + i];
+ if (len > 1)
+ wc = u8code(&es->cbuf[cur]);
+ else
+ wc = ch;
+ u8w = u8width(wc);
+ col += u8w;
}
}
}
if (cur == es->cursor && !leftside)
- ncol = col + pwidth - 1;
- cur++;
+ ncol = col + pwidth - u8w;
+ cur += u8mblen(ch);
}
if (cur == es->cursor)
ncol = col + pwidth;
@@ -1900,24 +2011,13 @@ display(char *wb1, char *wb2, int leftsi
col = pwidth;
cnt = winwidth;
- for (twb1 = wb1, twb2 = wb2; cnt; twb1++, twb2++) {
- if (*twb1 != *twb2) {
-
- /*
- * When a byte changes in the middle of a UTF-8
- * character, back up to the start byte, unless
- * the previous byte was the last one written.
- */
-
- if (col > 0 && isu8cont(*twb1)) {
- col--;
- if (lastb >= 0 && twb1 == wb1 + lastb + 1)
- cur_col = col;
- else while (twb1 > wb1 && isu8cont(*twb1)) {
- twb1--;
- twb2--;
- }
- }
+ w1col = 0;
+ w2col = 0;
+ for (twb1 = wb1, twb2 = wb2; cnt > 0;) {
+ wc1 = u8code(twb1);
+ wc2 = u8code(twb2);
+ u8w = u8width(wc1);
+ if (wc1 != wc2 || w1col != w2col) {
if (cur_col != col)
ed_mov_opt(col, wb1);
@@ -1926,16 +2026,11 @@ display(char *wb1, char *wb2, int leftsi
* Always write complete characters, and
* advance all pointers accordingly.
*/
-
- x_putc(*twb1);
- while (isu8cont(twb1[1])) {
- x_putc(*++twb1);
- twb2++;
- }
- lastb = *twb1 & 0x80 ? twb1 - wb1 : -1;
- cur_col++;
- } else if (isu8cont(*twb1))
- continue;
+ cur_col += u8w;
+ len = u8mblen(*twb1);
+ for (i = 0; i < len; i++)
+ x_putc(twb1[i]);
+ }
/*
* For changed continuation bytes, we backed up.
@@ -1943,8 +2038,16 @@ display(char *wb1, char *wb2, int leftsi
* So, getting here, we had a real column.
*/
- col++;
- cnt--;
+ col += u8w;
+ cnt -= u8w;
+ w1col += u8w;
+
+ twb1 += u8mblen(*twb1);
+ while (w1col >= w2col + u8width(wc2)) {
+ w2col += u8width(wc2);
+ twb2 += u8mblen(*twb2);
+ wc2 = u8code(twb2);
+ }
}
/* Update the "more character". */
@@ -1965,15 +2068,12 @@ display(char *wb1, char *wb2, int leftsi
x_putc(mc);
cur_col++;
morec = mc;
- lastb = -1;
}
/* Move the cursor to its new position. */
- if (cur_col != ncol) {
+ if (cur_col != ncol)
ed_mov_opt(ncol, wb1);
- lastb = -1;
- }
}
/* Move the display cursor to display column number col. */
@@ -1981,6 +2081,7 @@ static void
ed_mov_opt(int col, char *wb)
{
int ci;
+ int wc;
/* The cursor is already at the right place. */
@@ -2006,10 +2107,14 @@ ed_mov_opt(int col, char *wb)
/* Advance the cursor. */
- for (ci = pwidth; ci < col || isu8cont(*wb);
- ci = newcol((unsigned char)*wb++, ci))
+ for (ci = pwidth; ci < col || isu8cont(*wb); wb++) {
if (ci > cur_col || (ci == cur_col && !isu8cont(*wb)))
x_putc(*wb);
+ if (!isu8cont(*wb)) {
+ wc = u8code(wb);
+ ci = newcol(wc, ci);
+ }
+ }
cur_col = ci;
}
@@ -2257,5 +2362,70 @@ static int
isu8cont(unsigned char c)
{
return !Flag(FVISHOW8) && (c & (0x80 | 0x40)) == 0x80;
+}
+
+static int
+u8code(const char *str)
+{
+ int wc, i, len;
+ unsigned char c;
+
+ wc = 0;
+ c = *str++;
+
+ if (Flag(FVISHOW8))
+ return c;
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5) {
+ wc = c & 0x07;
+ len = 4;
+ } else if ((c & 0xf0) == 0xe0) {
+ wc = c & 0x0f;
+ len = 3;
+ } else if ((c & 0xe0) == 0xc0 && c > 0xc1) {
+ wc = c & 0x1f;
+ len = 2;
+ } else {
+ len = 1;
+ wc = c & 0xff;
+ }
+
+ for (i = 1; i < len; i++) {
+ c = *str++;
+ wc = (wc << 6) | (c & 0x3f);
+ }
+
+ return wc;
+}
+
+static int
+u8mblen(unsigned char c)
+{
+ int len;
+
+ if (Flag(FVISHOW8))
+ return 1;
+
+ if ((c & 0xf8) == 0xf0 && c < 0xf5)
+ len = 4;
+ else if ((c & 0xf0) == 0xe0)
+ len = 3;
+ else if ((c & 0xe0) == 0xc0 && c > 0xc1)
+ len = 2;
+ else
+ len = 1;
+
+ return len;
+}
+
+static int
+u8width(int wc)
+{
+ int w;
+
+ w = wcwidth(wc);
+ if (w == -1)
+ return 1;
+ return w;
}
#endif /* VI */