I'm updating a diff, but it is just a fix for cursor movement, it is still experimental and obvious problems are not fixed yet, this is just a diff taken from latest commit at github:
Index: basic.c =================================================================== RCS file: /cvs/src/usr.bin/mg/basic.c,v retrieving revision 1.47 diff -u -p -u -p -r1.47 basic.c --- basic.c 10 Oct 2015 09:13:14 -0000 1.47 +++ basic.c 30 May 2018 13:34:00 -0000 @@ -18,6 +18,7 @@ #include <signal.h> #include <stdio.h> #include <stdlib.h> +#include <wchar.h> #include "def.h" @@ -64,6 +65,33 @@ backchar(int f, int n) curwp->w_dotline--; } else curwp->w_doto--; + + /* + * If we're on a multibyte character, and not in + * raw mode, we should move all the way through it. + */ + if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t offset = 0; + size_t consumed = 0; + struct line *clp = curwp->w_dotp; + while (offset <= curwp->w_doto) { + size_t s = curwp->w_doto - offset; + size_t len = llength(clp) - s; + consumed = mbrtowc(&wc, &clp->l_text[s], + len, &mbs); + if (consumed < (size_t) -2) { + curwp->w_doto -= offset; + break; + } + mbs = (mbstate_t) { 0 }; + offset++; + if (offset > MB_LEN_MAX) { + break; + } + } + } } return (TRUE); } @@ -108,8 +136,27 @@ forwchar(int f, int n) curwp->w_doto = 0; curwp->w_dotline++; curwp->w_rflag |= WFMOVE; - } else + } else if (!(curbp->b_flag & BFSHOWRAW)) { + /* + * If we're on a multibyte character, and + * not in raw mode, we should move all the + * way through it. + */ + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t s = curwp->w_doto; + struct line *clp = curwp->w_dotp; + size_t len = llength(clp) - s; + size_t consumed = mbrtowc(&wc, &clp->l_text[s], + len, &mbs); + if (consumed && consumed < (size_t) -2) { + curwp->w_doto += consumed; + } else { + curwp->w_doto++; + } + } else { curwp->w_doto++; + } } return (TRUE); } @@ -269,12 +316,25 @@ setgoal(void) int getgoal(struct line *dlp) { - int c, i, col = 0; - char tmp[5]; + return getbyteofcol(dlp, 0, curgoal); +} +/* + * Return the byte offset within lp that is targetcol columns beyond + * startbyte + */ +size_t +getbyteofcol(const struct line *lp, const size_t startbyte, + const size_t targetcol) +{ + int c; + size_t i, col = 0; + char tmp[5]; + size_t advance_by = 1; - for (i = 0; i < llength(dlp); i++) { - c = lgetc(dlp, i); + for (i = startbyte; i < llength(lp); i += advance_by) { + advance_by = 1; + c = lgetc(lp, i); if (c == '\t' #ifdef NOTAB && !(curbp->b_flag & BFNOTAB) @@ -282,18 +342,86 @@ getgoal(struct line *dlp) ) { col |= 0x07; col++; - } else if (ISCTRL(c) != FALSE) { + } else if (iscntrl(c)) { col += 2; - } else if (isprint(c)) + } else if (isprint(c)) { col++; - else { + } else if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &lp->l_text[i], + llength(lp) - i, &mbs); + int width = -1; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } + if (width >= 0) { + col += width; + advance_by = consumed; + } else { + col += snprintf(tmp, sizeof(tmp), "\\%o", c); + } + } else { col += snprintf(tmp, sizeof(tmp), "\\%o", c); } - if (col > curgoal) + if (col > targetcol) break; } return (i); } + +/* + * Return the column at which specified offset byte would appear, if + * this were part of a longer string printed by vtputs, starting at + * intial_col + */ +size_t +getcolofbyte(const struct line *lp, const size_t startbyte, + const size_t initial_col, const size_t targetoffset) +{ + int c; + size_t i, col = initial_col; + char tmp[5]; + size_t advance_by = 1; + + for (i = startbyte; i < llength(lp); i += advance_by) { + if (i >= targetoffset) + break; + advance_by = 1; + c = lgetc(lp, i); + if (c == '\t' +#ifdef NOTAB + && !(curbp->b_flag & BFNOTAB) +#endif + ) { + col |= 0x07; + col++; + } else if (iscntrl(c)) { + col += 2; + } else if (isprint(c)) { + col++; + } else if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &lp->l_text[i], + llength(lp) - i, &mbs); + int width = -1; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } + if (width >= 0) { + col += width; + advance_by = consumed; + } else { + col += snprintf(tmp, sizeof(tmp), "\\%o", c); + } + } else { + col += snprintf(tmp, sizeof(tmp), "\\%o", c); + } + } + return (col); +} + /* * Scroll forward by a specified number Index: chrdef.h =================================================================== RCS file: chrdef.h diff -N chrdef.h --- chrdef.h 25 Mar 2015 12:29:03 -0000 1.10 +++ /dev/null 1 Jan 1970 00:00:00 -0000 @@ -1,74 +0,0 @@ -/* $OpenBSD: chrdef.h,v 1.10 2015/03/25 12:29:03 bcallah Exp $ */ - -/* This file is in the public domain. */ - -/* - * sys/default/chardef.h: character set specific #defines for Mg 2a - * Warning: System specific ones exist - */ - -/* - * Casting should be at least as efficient as anding with 0xff, - * and won't have the size problems. - */ -#define CHARMASK(c) ((unsigned char) (c)) - -/* - * These flags, and the macros below them, - * make up a do-it-yourself set of "ctype" macros that - * understand the DEC multinational set, and let me ask - * a slightly different set of questions. - */ -#define _MG_W 0x01 /* Word. */ -#define _MG_U 0x02 /* Upper case letter. */ -#define _MG_L 0x04 /* Lower case letter. */ -#define _MG_C 0x08 /* Control. */ -#define _MG_P 0x10 /* end of sentence punctuation */ -#define _MG_D 0x20 /* is decimal digit */ - -#define ISWORD(c) ((cinfo[CHARMASK(c)]&_MG_W)!=0) -#define ISCTRL(c) ((cinfo[CHARMASK(c)]&_MG_C)!=0) -#define ISUPPER(c) ((cinfo[CHARMASK(c)]&_MG_U)!=0) -#define ISLOWER(c) ((cinfo[CHARMASK(c)]&_MG_L)!=0) -#define ISEOSP(c) ((cinfo[CHARMASK(c)]&_MG_P)!=0) -#define ISDIGIT(c) ((cinfo[CHARMASK(c)]&_MG_D)!=0) -#define TOUPPER(c) ((c)-0x20) -#define TOLOWER(c) ((c)+0x20) - -/* - * Generally useful thing for chars - */ -#define CCHR(x) ((x) ^ 0x40) /* CCHR('?') == DEL */ - -#define K00 256 -#define K01 257 -#define K02 258 -#define K03 259 -#define K04 260 -#define K05 261 -#define K06 262 -#define K07 263 -#define K08 264 -#define K09 265 -#define K0A 266 -#define K0B 267 -#define K0C 268 -#define K0D 269 -#define K0E 270 -#define K0F 271 -#define K10 272 -#define K11 273 -#define K12 274 -#define K13 275 -#define K14 276 -#define K15 277 -#define K16 278 -#define K17 279 -#define K18 280 -#define K19 281 -#define K1A 282 -#define K1B 283 -#define K1C 284 -#define K1D 285 -#define K1E 286 -#define K1F 287 Index: cinfo.c =================================================================== RCS file: /cvs/src/usr.bin/mg/cinfo.c,v retrieving revision 1.18 diff -u -p -u -p -r1.18 cinfo.c --- cinfo.c 19 Mar 2015 21:22:15 -0000 1.18 +++ cinfo.c 30 May 2018 13:34:00 -0000 @@ -10,92 +10,25 @@ * ctype.h) don't let you ask. */ +#include <ctype.h> #include <sys/queue.h> #include <signal.h> #include <stdio.h> #include <string.h> +#include <wchar.h> +#include <wctype.h> #include "def.h" -/* - * This table, indexed by a character drawn - * from the 256 member character set, is used by my - * own character type macros to answer questions about the - * type of a character. It handles the full multinational - * character set, and lets me ask some questions that the - * standard "ctype" macros cannot ask. - */ -/* - * Due to incompatible behaviour between "standard" emacs and - * ctags word traversing, '_' character's value is changed on - * the fly in ctags mode, hence non-const. +/* Flag for treating '_' as word-like byte */ -char cinfo[256] = { - _MG_C, _MG_C, _MG_C, _MG_C, /* 0x0X */ - _MG_C, _MG_C, _MG_C, _MG_C, - _MG_C, _MG_C, _MG_C, _MG_C, - _MG_C, _MG_C, _MG_C, _MG_C, - _MG_C, _MG_C, _MG_C, _MG_C, /* 0x1X */ - _MG_C, _MG_C, _MG_C, _MG_C, - _MG_C, _MG_C, _MG_C, _MG_C, - _MG_C, _MG_C, _MG_C, _MG_C, - 0, _MG_P, 0, 0, /* 0x2X */ - _MG_W, _MG_W, 0, _MG_W, - 0, 0, 0, 0, - 0, 0, _MG_P, 0, - _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, /* 0x3X */ - _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, - _MG_D | _MG_W, _MG_D | _MG_W, 0, 0, - 0, 0, 0, _MG_P, - 0, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, /* 0x4X */ - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, /* 0x5X */ - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, 0, - 0, 0, 0, 0, - 0, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, /* 0x6X */ - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, /* 0x7X */ - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, 0, - 0, 0, 0, _MG_C, - 0, 0, 0, 0, /* 0x8X */ - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, /* 0x9X */ - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, /* 0xAX */ - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, /* 0xBX */ - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, /* 0xCX */ - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - 0, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, /* 0xDX */ - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, - _MG_U | _MG_W, _MG_U | _MG_W, 0, _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, /* 0xEX */ - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - 0, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, /* 0xFX */ - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, - _MG_L | _MG_W, _MG_L | _MG_W, 0, 0 -}; +static int underscoreisword; + +void +treatunderscoreasword(int i) +{ + underscoreisword = i; +} /* * Find the name of a keystroke. Needs to be changed to handle 8-bit printing @@ -109,7 +42,7 @@ getkeyname(char *cp, size_t len, int k) size_t copied; if (k < 0) - k = CHARMASK(k); /* sign extended char */ + k = (unsigned char)(k); /* sign extended char */ switch (k) { case CCHR('@'): np = "C-SPC"; @@ -130,10 +63,10 @@ getkeyname(char *cp, size_t len, int k) np = "DEL"; break; default: - if (k >= KFIRST && k <= KLAST && - (np = keystrings[k - KFIRST]) != NULL) + if (k == 256) { + np = NULL; break; - if (k > CCHR('?')) { + } else if (k > CCHR('?')) { *cp++ = '0'; *cp++ = ((k >> 6) & 7) + '0'; *cp++ = ((k >> 3) & 7) + '0'; @@ -144,8 +77,8 @@ getkeyname(char *cp, size_t len, int k) *cp++ = 'C'; *cp++ = '-'; k = CCHR(k); - if (ISUPPER(k)) - k = TOLOWER(k); + if (isupper(k)) + k = tolower(k); } *cp++ = k; *cp = '\0'; @@ -155,4 +88,32 @@ getkeyname(char *cp, size_t len, int k) if (copied >= len) copied = len - 1; return (cp + copied); +} + +/* + Returns non-zero iff s[k] is a character (or the start of a + multibyte character, we won't examine more than "len" bytes) + which we consider to be "inside a word". In practice, this means + alphanumerics, "$", "%", "'". We'd also like to sometimes include + "_", but that's rare enough that we'll handle it at the relevant + call site. + */ +int +byteinword(const char *s, size_t k, size_t len) { + if (s[k] == '$' || s[k] == '%' || s[k] == '\'') { + return 1; + } else if (s[k] == '_') { + return underscoreisword; + } + + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &s[k], len, &mbs); + + if (consumed < (size_t) -2) { + return iswalnum(wc); + } + + /* If we're in byte-garbage: sure, whatever */ + return 1; } Index: cmode.c =================================================================== RCS file: /cvs/src/usr.bin/mg/cmode.c,v retrieving revision 1.16 diff -u -p -u -p -r1.16 cmode.c --- cmode.c 26 Sep 2015 21:51:58 -0000 1.16 +++ cmode.c 30 May 2018 13:34:00 -0000 @@ -14,6 +14,7 @@ #include <ctype.h> #include <signal.h> #include <stdio.h> +#include <wchar.h> #include "def.h" #include "funmap.h" @@ -419,10 +420,25 @@ findcolpos(const struct buffer *bp, cons ) { col |= 0x07; col++; - } else if (ISCTRL(c) != FALSE) + } else if (iscntrl(c) != FALSE) { col += 2; - else if (isprint(c)) { + } else if (isprint(c)) { col++; + } else if (!(bp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &lp->l_text[i], + llength(lp) - i, &mbs); + int width = -1; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } + if (width >= 0) { + col += width; + i += (consumed - 1); + } else { + col += snprintf(tmp, sizeof(tmp), "\\%o", c); + } } else { col += snprintf(tmp, sizeof(tmp), "\\%o", c); } Index: def.h =================================================================== RCS file: /cvs/src/usr.bin/mg/def.h,v retrieving revision 1.155 diff -u -p -u -p -r1.155 def.h --- def.h 14 Apr 2016 17:05:32 -0000 1.155 +++ def.h 30 May 2018 13:34:01 -0000 @@ -10,7 +10,10 @@ * per-terminal definitions are in special header files. */ -#include "chrdef.h" +/* + * Generally useful thing for chars + */ +#define CCHR(x) ((x) ^ 0x40) /* CCHR('?') == DEL */ typedef int (*PF)(int, int); /* generally useful type */ @@ -148,7 +151,7 @@ struct line { */ #define lforw(lp) ((lp)->l_fp) #define lback(lp) ((lp)->l_bp) -#define lgetc(lp, n) (CHARMASK((lp)->l_text[(n)])) +#define lgetc(lp, n) ((unsigned char)((lp)->l_text[(n)])) #define lputc(lp, n, c) ((lp)->l_text[(n)]=(c)) #define llength(lp) ((lp)->l_used) #define ltext(lp) ((lp)->l_text) @@ -263,7 +266,7 @@ struct buffer { int b_marko; /* ditto for the "mark" */ short b_nmodes; /* number of non-fundamental modes */ char b_nwnd; /* Count of windows on buffer */ - char b_flag; /* Flags */ + short b_flag; /* Flags */ char b_fname[NFILEN]; /* File name */ char b_cwd[NFILEN]; /* working directory */ struct fileinfo b_fi; /* File attributes */ @@ -290,6 +293,7 @@ struct buffer { #define BFDIRTY 0x20 /* Buffer was modified elsewhere */ #define BFIGNDIRTY 0x40 /* Ignore modifications */ #define BFDIREDDEL 0x80 /* Dired has a deleted 'D' file */ +#define BFSHOWRAW 0x100 /* Show unprintable as octal */ /* * This structure holds information about recent actions for the Undo command. */ @@ -314,9 +318,6 @@ struct undo_rec { #define putpad(str, num) tputs(str, num, ttputc) -#define KFIRST K00 -#define KLAST K00 - /* * Prototypes. */ @@ -387,7 +388,8 @@ void lchange(int); int linsert(int, int); int lnewline_at(struct line *, int); int lnewline(void); -int ldelete(RSIZE, int); +int ldeletebyte(RSIZE, int); +int ldeletechar(RSIZE, int); int ldelnewline(void); int lreplace(RSIZE, char *); char * linetostr(const struct line *); @@ -490,6 +492,7 @@ int digit_argument(int, int); int negative_argument(int, int); int selfinsert(int, int); int quote(int, int); +int insert_char(int, int); /* main.c */ int ctrlg(int, int); @@ -499,7 +502,9 @@ int quit(int, int); void panic(char *); /* cinfo.c */ +void treatunderscoreasword(int); char *getkeyname(char *, size_t, int); +int byteinword(const char *, size_t, size_t); /* basic.c */ int gotobol(int, int); @@ -512,6 +517,8 @@ int forwline(int, int); int backline(int, int); void setgoal(void); int getgoal(struct line *); +size_t getbyteofcol(const struct line *, size_t, size_t); +size_t getcolofbyte(const struct line *, size_t, size_t, size_t); int forwpage(int, int); int backpage(int, int); int forw1page(int, int); @@ -658,6 +665,7 @@ int notabmode(int, int); #endif /* NOTAB */ int overwrite_mode(int, int); int set_default_mode(int,int); +int show_raw_mode(int, int); #ifdef REGEX /* re_search.c X */ @@ -737,7 +745,6 @@ extern int doaudiblebell; extern int dovisiblebell; extern int dblspace; extern char cinfo[]; -extern char *keystrings[]; extern char pat[NPAT]; extern char prompt[]; Index: display.c =================================================================== RCS file: /cvs/src/usr.bin/mg/display.c,v retrieving revision 1.48 diff -u -p -u -p -r1.48 display.c --- display.c 6 Jul 2017 19:27:37 -0000 1.48 +++ display.c 30 May 2018 13:34:01 -0000 @@ -7,17 +7,19 @@ * redisplay system knows almost nothing about the editing * process; the editing functions do, however, set some * hints to eliminate a lot of the grinding. There is more - * that can be done; the "vtputc" interface is a real + * that can be done; the "vtputs" interface is a real * pig. */ #include <sys/queue.h> #include <ctype.h> +#include <limits.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <term.h> +#include <wchar.h> #include "def.h" #include "kbd.h" @@ -52,11 +54,10 @@ struct score { }; void vtmove(int, int); -void vtputc(int); void vtpute(int); -int vtputs(const char *); +int vtputs(const char *, size_t, size_t); void vteeol(void); -void updext(int, int); +int updext(int, int); void modeline(struct mgwin *, int); void setscores(int, int); void traceback(int, int, int, int); @@ -216,8 +217,8 @@ vtresize(int force, int newrow, int newc } if (rowchanged || colchanged || first_run) { for (i = 0; i < 2 * (newrow - 1); i++) - TRYREALLOC(video[i].v_text, newcol); - TRYREALLOC(blanks.v_text, newcol); + TRYREALLOC(video[i].v_text, newcol * MB_LEN_MAX); + TRYREALLOC(blanks.v_text, newcol * MB_LEN_MAX); } nrow = newrow; @@ -260,7 +261,7 @@ vtinit(void) */ blanks.v_color = CTEXT; - for (i = 0; i < ncol; ++i) + for (i = 0; i < ncol * MB_LEN_MAX; ++i) blanks.v_text[i] = ' '; } @@ -287,7 +288,7 @@ vttidy(void) * Move the virtual cursor to an origin * 0 spot on the virtual display screen. I could * store the column as a character pointer to the spot - * on the line, which would make "vtputc" a little bit + * on the line, which would make "vtputs" a little bit * more efficient. No checking for errors. */ void @@ -298,88 +299,6 @@ vtmove(int row, int col) } /* - * Write a character to the virtual display, - * dealing with long lines and the display of unprintable - * things like control characters. Also expand tabs every 8 - * columns. This code only puts printing characters into - * the virtual display image. Special care must be taken when - * expanding tabs. On a screen whose width is not a multiple - * of 8, it is possible for the virtual cursor to hit the - * right margin before the next tab stop is reached. This - * makes the tab code loop if you are not careful. - * Three guesses how we found this. - */ -void -vtputc(int c) -{ - struct video *vp; - - c &= 0xff; - - vp = vscreen[vtrow]; - if (vtcol >= ncol) - vp->v_text[ncol - 1] = '$'; - else if (c == '\t' -#ifdef NOTAB - && !(curbp->b_flag & BFNOTAB) -#endif - ) { - do { - vtputc(' '); - } while (vtcol < ncol && (vtcol & 0x07) != 0); - } else if (ISCTRL(c)) { - vtputc('^'); - vtputc(CCHR(c)); - } else if (isprint(c)) - vp->v_text[vtcol++] = c; - else { - char bf[5]; - - snprintf(bf, sizeof(bf), "\\%o", c); - vtputs(bf); - } -} - -/* - * Put a character to the virtual screen in an extended line. If we are not - * yet on left edge, don't print it yet. Check for overflow on the right - * margin. - */ -void -vtpute(int c) -{ - struct video *vp; - - c &= 0xff; - - vp = vscreen[vtrow]; - if (vtcol >= ncol) - vp->v_text[ncol - 1] = '$'; - else if (c == '\t' -#ifdef NOTAB - && !(curbp->b_flag & BFNOTAB) -#endif - ) { - do { - vtpute(' '); - } while (((vtcol + lbound) & 0x07) != 0 && vtcol < ncol); - } else if (ISCTRL(c) != FALSE) { - vtpute('^'); - vtpute(CCHR(c)); - } else if (isprint(c)) { - if (vtcol >= 0) - vp->v_text[vtcol] = c; - ++vtcol; - } else { - char bf[5], *cp; - - snprintf(bf, sizeof(bf), "\\%o", c); - for (cp = bf; *cp != '\0'; cp++) - vtpute(*cp); - } -} - -/* * Erase from the end of the software cursor to the end of the line on which * the software cursor is located. The display routines will decide if a * hardware erase to end of line command should be used to display this. @@ -390,7 +309,7 @@ vteeol(void) struct video *vp; vp = vscreen[vtrow]; - while (vtcol < ncol) + while (vtcol < ncol * MB_LEN_MAX) vp->v_text[vtcol++] = ' '; } @@ -410,7 +329,7 @@ update(int modelinecolor) struct mgwin *wp; struct video *vp1; struct video *vp2; - int c, i, j; + int c, i; int hflag; int currow, curcol; int offs, size; @@ -485,8 +404,9 @@ update(int modelinecolor) vscreen[i]->v_color = CTEXT; vscreen[i]->v_flag |= (VFCHG | VFHBAD); vtmove(i, 0); - for (j = 0; j < llength(lp); ++j) - vtputc(lgetc(lp, j)); + if (llength(lp)) { + vtputs(lp->l_text, llength(lp), 0); + } vteeol(); } else if ((wp->w_rflag & (WFEDIT | WFFULL)) != 0) { hflag = TRUE; @@ -495,8 +415,10 @@ update(int modelinecolor) vscreen[i]->v_flag |= (VFCHG | VFHBAD); vtmove(i, 0); if (lp != wp->w_bufp->b_headp) { - for (j = 0; j < llength(lp); ++j) - vtputc(lgetc(lp, j)); + if (llength(lp)) { + vtputs(lp->l_text, llength(lp), + 0); + } lp = lforw(lp); } vteeol(); @@ -514,32 +436,53 @@ update(int modelinecolor) ++currow; lp = lforw(lp); } + curcol = 0; i = 0; while (i < curwp->w_doto) { - c = lgetc(lp, i++); + char tmp[5]; + c = lgetc(lp, i); if (c == '\t' #ifdef NOTAB && !(curbp->b_flag & BFNOTAB) #endif ) { - curcol |= 0x07; curcol++; - } else if (ISCTRL(c) != FALSE) + while ((curcol - lbound) & 0x07) { + curcol++; + } + } else if (iscntrl(c) != FALSE) curcol += 2; - else if (isprint(c)) + else if (isprint(c)) { curcol++; - else { - char bf[5]; - - snprintf(bf, sizeof(bf), "\\%o", c); - curcol += strlen(bf); + } else if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &lp->l_text[i], + llength(lp) - i, &mbs); + int width = -1; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } else { + memset(&mbs, 0, sizeof mbs); + } + if (width >= 0) { + curcol += width; + i += (consumed - 1); + } else { + snprintf(tmp, sizeof(tmp), "\\%o", c); + curcol += strlen(tmp); + } + } else { + snprintf(tmp, sizeof(tmp), "\\%o", c); + curcol += strlen(tmp); } + i++; } if (curcol >= ncol - 1) { /* extended line. */ /* flag we are extended and changed */ vscreen[currow]->v_flag |= VFEXT | VFCHG; - updext(currow, curcol); /* and output extended line */ + curcol = updext(currow, curcol); } else lbound = 0; /* not extended line */ @@ -558,8 +501,10 @@ update(int modelinecolor) if ((wp != curwp) || (lp != wp->w_dotp) || (curcol < ncol - 1)) { vtmove(i, 0); - for (j = 0; j < llength(lp); ++j) - vtputc(lgetc(lp, j)); + if (llength(lp)) { + vtputs(lp->l_text, llength(lp), + 0); + } vteeol(); /* this line no longer is extended */ vscreen[i]->v_flag &= ~VFEXT; @@ -661,39 +606,44 @@ ucopy(struct video *vvp, struct video *p pvp->v_hash = vvp->v_hash; pvp->v_cost = vvp->v_cost; pvp->v_color = vvp->v_color; - bcopy(vvp->v_text, pvp->v_text, ncol); + bcopy(vvp->v_text, pvp->v_text, ncol * MB_LEN_MAX); } /* - * updext: update the extended line which the cursor is currently on at a - * column greater than the terminal width. The line will be scrolled right or - * left to let the user see where the cursor is. + * updext: update the extended line which the cursor is currently on + * at a column greater than the terminal width. The line will be + * scrolled right or left to let the user see where the cursor + * is. curcol may need to be adjusted, depending on how wide + * characters and lbound interact, that adjusted position is returned. */ -void +int updext(int currow, int curcol) { - struct line *lp; /* pointer to current line */ - int j; /* index into line */ + struct line *lp = curwp->w_dotp; /* pointer to current line */ + size_t startbyte; + int bettercol = curcol; + size_t fullextent; if (ncol < 2) - return; + return curcol; /* - * calculate what column the left bound should be - * (force cursor into middle half of screen) + * calculate what column the left bound should be (force + * cursor into middle half of screen). Ensuring that it is at + * a tabstop allows update() to calculate curcol without + * wondering how tabstops are calculated before the first '$'. */ lbound = curcol - (curcol % (ncol >> 1)) - (ncol >> 2); + lbound = (lbound | 0x07) + 1; + vscreen[currow]->v_text[0] = '$'; + vtmove(currow, 1); + startbyte = getbyteofcol(lp, 0, lbound + 1); + fullextent = getbyteofcol(lp, startbyte, ncol + 1); + vtputs(lp->l_text + startbyte, fullextent - startbyte, 1); + vteeol(); - /* - * scan through the line outputing characters to the virtual screen - * once we reach the left edge - */ - vtmove(currow, -lbound); /* start scanning offscreen */ - lp = curwp->w_dotp; /* line to output */ - for (j = 0; j < llength(lp); ++j) /* until the end-of-line */ - vtpute(lgetc(lp, j)); - vteeol(); /* truncate the virtual line */ - vscreen[currow]->v_text[0] = '$'; /* and put a '$' in column 1 */ + bettercol = lbound + getcolofbyte(lp, startbyte, 1, curwp->w_doto); + return (bettercol); } /* @@ -708,12 +658,35 @@ updext(int currow, int curcol) void uline(int row, struct video *vvp, struct video *pvp) { - char *cp1; - char *cp2; - char *cp3; - char *cp4; - char *cp5; + char *cp1; /* Pointer to the start of dirty region */ + char *cp2; /* pvp's counterpart for cp1 */ + char *cp3; /* Pointer to end of dirty region */ + char *cp4; /* pvp's counterpart for cp3 */ + char *cp5; /* After this, within dirty region, all is ' ' */ + char *mbcounter; int nbflag; + int startcol; /* onscreen column matching cp1 */ + char *lastbyte; /* byte which handles last onscreen column */ + int seencols = 0; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + + lastbyte = vvp->v_text; + while (seencols < ncol && *lastbyte) { + size_t consumed = mbrtowc(&wc, lastbyte, + (vvp->v_text + ncol * MB_LEN_MAX - lastbyte), &mbs); + if (consumed < (size_t) -2) { + lastbyte += consumed; + seencols += wcwidth(wc); + } else { + lastbyte++; + seencols++; + memset(&mbs, 0, sizeof mbs); + } + } + if (lastbyte - vvp->v_text < ncol) { + lastbyte = &vvp->v_text[ncol]; + } if (vvp->v_color != pvp->v_color) { /* Wrong color, do a */ ttmove(row, 0); /* full redraw. */ @@ -729,11 +702,12 @@ uline(int row, struct video *vvp, struct * putting the invisible glitch character on the next line. * (Hazeltine executive 80 model 30) */ - cp2 = &vvp->v_text[ncol - (magic_cookie_glitch >= 0 ? - (magic_cookie_glitch != 0 ? magic_cookie_glitch : 1) : 0)]; + cp2 = lastbyte - + (magic_cookie_glitch >= 0 ? (magic_cookie_glitch != 0 ? + magic_cookie_glitch : 1) : 0); #else cp1 = &vvp->v_text[0]; - cp2 = &vvp->v_text[ncol]; + cp2 = lastbyte; #endif while (cp1 != cp2) { ttputc(*cp1++); @@ -744,21 +718,31 @@ uline(int row, struct video *vvp, struct } cp1 = &vvp->v_text[0]; /* Compute left match. */ cp2 = &pvp->v_text[0]; - while (cp1 != &vvp->v_text[ncol] && cp1[0] == cp2[0]) { + while (cp1 != lastbyte && cp1[0] == cp2[0]) { ++cp1; ++cp2; } - if (cp1 == &vvp->v_text[ncol]) /* All equal. */ + if (cp1 == lastbyte) /* All equal. */ return; + while (cp1 != vvp->v_text && !isprint(*cp1) && + mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) { + --cp1; + --cp2; + } nbflag = FALSE; - cp3 = &vvp->v_text[ncol]; /* Compute right match. */ - cp4 = &pvp->v_text[ncol]; + cp3 = lastbyte; /* Compute right match. */ + cp4 = &pvp->v_text[lastbyte - vvp->v_text]; while (cp3[-1] == cp4[-1]) { --cp3; --cp4; if (cp3[0] != ' ') /* Note non-blanks in */ nbflag = TRUE; /* the right match. */ } + while (cp3 != lastbyte && !isprint(*cp3) && + mbrtowc(&wc, cp3, (lastbyte - cp3), &mbs) >= (size_t) -2) { + ++cp3; + ++cp4; + } cp5 = cp3; /* Is erase good? */ if (nbflag == FALSE && vvp->v_color == CTEXT) { while (cp5 != cp1 && cp5[-1] == ' ') @@ -768,13 +752,27 @@ uline(int row, struct video *vvp, struct cp5 = cp3; } /* Alcyon hack */ - ttmove(row, (int) (cp1 - &vvp->v_text[0])); + startcol = 0; + mbcounter = vvp->v_text; + while ((cp1 - mbcounter) > 0) { + size_t consumed = mbrtowc(&wc, mbcounter, (cp1 - mbcounter), + &mbs); + if (consumed < (size_t) -2) { + mbcounter += consumed; + startcol += wcwidth(wc); + } else { + mbcounter++; + startcol++; + memset(&mbs, 0, sizeof mbs); + } + } + ttmove(row, startcol); #ifdef STANDOUT_GLITCH if (vvp->v_color != CTEXT && magic_cookie_glitch > 0) { if (cp1 < &vvp->v_text[magic_cookie_glitch]) cp1 = &vvp->v_text[magic_cookie_glitch]; - if (cp5 > &vvp->v_text[ncol - magic_cookie_glitch]) - cp5 = &vvp->v_text[ncol - magic_cookie_glitch]; + if (cp5 > lastbyte - magic_cookie_glitch) + cp5 = lastbyte - magic_cookie_glitch; } else if (magic_cookie_glitch < 0) #endif ttcolor(vvp->v_color); @@ -807,46 +805,39 @@ modeline(struct mgwin *wp, int modelinec vscreen[n]->v_flag |= (VFCHG | VFHBAD); /* Recompute, display. */ vtmove(n, 0); /* Seek to right line. */ bp = wp->w_bufp; - vtputc('-'); - vtputc('-'); + n = vtputs("--", 0, 0); if ((bp->b_flag & BFREADONLY) != 0) { - vtputc('%'); + n += vtputs("%", 0, n); if ((bp->b_flag & BFCHG) != 0) - vtputc('*'); + n += vtputs("*", 0, n); else - vtputc('%'); + n += vtputs("%", 0, n); } else if ((bp->b_flag & BFCHG) != 0) { /* "*" if changed. */ - vtputc('*'); - vtputc('*'); + n += vtputs("**", 0, n); } else { - vtputc('-'); - vtputc('-'); + n += vtputs("--", 0, n); } - vtputc('-'); + n += vtputs("-", 0, n); n = 5; - n += vtputs("Mg: "); + n += vtputs("Mg: ", 0, n); if (bp->b_bname[0] != '\0') - n += vtputs(&(bp->b_bname[0])); + n += vtputs(&(bp->b_bname[0]), 0, n); while (n < 42) { /* Pad out with blanks. */ - vtputc(' '); - ++n; + n += vtputs(" ", 0, n); } - vtputc('('); - ++n; + n += vtputs("(", 0, n); for (md = 0; ; ) { - n += vtputs(bp->b_modes[md]->p_name); + n += vtputs(bp->b_modes[md]->p_name, 0, n); if (++md > bp->b_nmodes) break; - vtputc('-'); - ++n; + n += vtputs("-", 0, n); } /* XXX These should eventually move to a real mode */ if (macrodef == TRUE) - n += vtputs("-def"); + n += vtputs("-def", 0, n); if (globalwd == TRUE) - n += vtputs("-gwd"); - vtputc(')'); - ++n; + n += vtputs("-gwd", 0, n); + n += vtputs(")", 0, n); if (linenos && colnos) len = snprintf(sl, sizeof(sl), "--L%d--C%d", wp->w_dotline, @@ -856,27 +847,132 @@ modeline(struct mgwin *wp, int modelinec else if (colnos) len = snprintf(sl, sizeof(sl), "--C%d", getcolpos(wp)); if ((linenos || colnos) && len < sizeof(sl) && len != -1) - n += vtputs(sl); + n += vtputs(sl, 0, n); while (n < ncol) { /* Pad out. */ - vtputc('-'); - ++n; + n += vtputs("-", 0, n); } } /* - * Output a string to the mode line, report how long it was. + * Output a string to the mode line, report how long it was, + * dealing with long lines and the display of unprintable + * things like control characters. Also expand tabs every 8 + * columns. This code only puts printing characters into + * the virtual display image. Special care must be taken when + * expanding tabs. On a screen whose width is not a multiple + * of 8, it is possible for the virtual cursor to hit the + * right margin before the next tab stop is reached. This + * makes the tab code loop if you are not careful. + * Three guesses how we found this. */ int -vtputs(const char *s) +vtputs(const char *s, const size_t max_bytes, const size_t initial_col) { - int n = 0; + const unsigned char *us = (const unsigned char *) s; + struct video *vp = vscreen[vtrow]; + size_t bytes_handled = 0; + size_t last_full_byte_start = vtcol; + size_t space_printed = 0; + + if (!s) { + return (0); + } + + while (*us && (!max_bytes || bytes_handled < max_bytes)) { + if (space_printed + initial_col >= ncol) { + break; + } else if (*us == '\t' +#ifdef NOTAB + && !(curbp->b_flag & BFNOTAB) +#endif + ) { + last_full_byte_start = vtcol; + do { + if (vtcol >= 0) { + last_full_byte_start = vtcol; + vp->v_text[vtcol] = ' '; + } + vtcol++; + space_printed++; + } while (space_printed + initial_col < ncol && + ((space_printed + initial_col) & 0x07)); + us++; + bytes_handled++; + } else if (iscntrl(*us)) { + last_full_byte_start = vtcol; + if (vtcol >= 0) { + vp->v_text[vtcol] = '^'; + } + vtcol++; + if (vtcol >= 0) { + vp->v_text[vtcol] = CCHR(*us); + } + vtcol++; + bytes_handled++; + space_printed += 2; + us++; + } else if (isprint(*us)) { + last_full_byte_start = vtcol; + if (vtcol >= 0) { + vp->v_text[vtcol] = *us++; + } + vtcol++; + bytes_handled++; + space_printed++; + } else if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumable = max_bytes ? + (max_bytes - bytes_handled) : -1; + size_t consumed = mbrtowc(&wc, (const char *)us, + consumable, &mbs); + int width = -1; + last_full_byte_start = vtcol; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } + if (width >= 0) { + bytes_handled += consumed; + space_printed += width; + do { + if (vtcol >= 0) { + vp->v_text[vtcol] = *us++; + } + vtcol++; + } while (--consumed); + } else { + char bf[5]; + snprintf(bf, sizeof(bf), "\\%o", *us); + bytes_handled++; + space_printed += vtputs(bf, 0, + space_printed + initial_col); + us++; + } + } else { + char bf[5]; + last_full_byte_start = vtcol; + snprintf(bf, sizeof(bf), "\\%o", *us); + bytes_handled++; + space_printed += vtputs(bf, 0, + space_printed + initial_col); + us++; + } + } - while (*s != '\0') { - vtputc(*s++); - ++n; + if ((space_printed + initial_col > ncol) || + (space_printed + initial_col == ncol && + (*us && (!max_bytes || bytes_handled < max_bytes)))) { + vp->v_text[last_full_byte_start] = '$'; + while (++last_full_byte_start <= vtcol) { + vp->v_text[last_full_byte_start] = ' '; + } + bytes_handled++; + space_printed++; + us++; } - return (n); + + return (space_printed); } /* @@ -894,11 +990,11 @@ hash(struct video *vp) char *s; if ((vp->v_flag & VFHBAD) != 0) { /* Hash bad. */ - s = &vp->v_text[ncol - 1]; - for (i = ncol; i != 0; --i, --s) + s = &vp->v_text[ncol * MB_LEN_MAX - 1]; + for (i = ncol * MB_LEN_MAX; i != 0; --i, --s) if (*s != ' ') break; - n = ncol - i; /* Erase cheaper? */ + n = ncol * MB_LEN_MAX - i; /* Erase cheaper? */ if (n > tceeol) n = tceeol; vp->v_cost = i + n; /* Bytes + blanks. */ Index: echo.c =================================================================== RCS file: /cvs/src/usr.bin/mg/echo.c,v retrieving revision 1.66 diff -u -p -u -p -r1.66 echo.c --- echo.c 24 Oct 2016 17:18:42 -0000 1.66 +++ echo.c 30 May 2018 13:34:01 -0000 @@ -9,6 +9,7 @@ * of the display screen. Used by the entire known universe. */ +#include <ctype.h> #include <sys/queue.h> #include <signal.h> #include <stdarg.h> @@ -187,6 +188,7 @@ static char * veread(const char *fp, char *buf, size_t nbuf, int flag, va_list ap) { int dynbuf = (buf == NULL); + size_t buf_len; int cpos, epos; /* cursor, end position in buf */ int c, i, y; int cplflag; /* display completion list */ @@ -215,6 +217,11 @@ veread(const char *fp, char *buf, size_t epos = cpos = 0; ml = mr = esc = 0; cplflag = FALSE; + if (buf == NULL) { + buf_len = 0; + } else { + buf_len = strlen(buf); + } if ((flag & EFNEW) != 0 || ttrow != nrow - 1) { ttcolor(CTEXT); @@ -227,7 +234,7 @@ veread(const char *fp, char *buf, size_t if (buf == NULL) return (NULL); eputs(buf); - epos = cpos += strlen(buf); + epos = cpos += buf_len; } tteeol(); ttflush(); @@ -275,7 +282,7 @@ veread(const char *fp, char *buf, size_t switch (c) { case CCHR('A'): /* start of line */ while (cpos > 0) { - if (ISCTRL(buf[--cpos]) != FALSE) { + if (iscntrl(buf[--cpos]) != FALSE) { ttputc('\b'); --ttcol; } @@ -306,7 +313,7 @@ veread(const char *fp, char *buf, size_t break; case CCHR('B'): /* back */ if (cpos > 0) { - if (ISCTRL(buf[--cpos]) != FALSE) { + if (iscntrl(buf[--cpos]) != FALSE) { ttputc('\b'); --ttcol; } @@ -411,7 +418,7 @@ veread(const char *fp, char *buf, size_t epos--; ttputc('\b'); ttcol--; - if (ISCTRL(y) != FALSE) { + if (iscntrl(y) != FALSE) { ttputc('\b'); ttcol--; } @@ -422,7 +429,7 @@ veread(const char *fp, char *buf, size_t eputc(buf[i]); } ttputc(' '); - if (ISCTRL(y) != FALSE) { + if (iscntrl(y) != FALSE) { ttputc(' '); ttputc('\b'); } @@ -438,7 +445,7 @@ veread(const char *fp, char *buf, size_t ttputc(' '); ttputc('\b'); --ttcol; - if (ISCTRL(buf[--cpos]) != FALSE) { + if (iscntrl(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); @@ -449,12 +456,12 @@ veread(const char *fp, char *buf, size_t ttflush(); break; case CCHR('W'): /* kill to beginning of word */ - while ((cpos > 0) && !ISWORD(buf[cpos - 1])) { + while ((cpos > 0) && !byteinword(buf, cpos - 1, buf_len - cpos + 1)) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; - if (ISCTRL(buf[--cpos]) != FALSE) { + if (iscntrl(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); @@ -462,12 +469,12 @@ veread(const char *fp, char *buf, size_t } epos--; } - while ((cpos > 0) && ISWORD(buf[cpos - 1])) { + while ((cpos > 0) && byteinword(buf, cpos - 1, buf_len - cpos + 1)) { ttputc('\b'); ttputc(' '); ttputc('\b'); --ttcol; - if (ISCTRL(buf[--cpos]) != FALSE) { + if (iscntrl(buf[--cpos]) != FALSE) { ttputc('\b'); ttputc(' '); ttputc('\b'); @@ -797,6 +804,7 @@ int getxtra(struct list *lp1, struct list *lp2, int cpos, int wflag) { int i; + size_t name_len = strlen(lp1->l_name); i = cpos; for (;;) { @@ -805,7 +813,7 @@ getxtra(struct list *lp1, struct list *l if (lp1->l_name[i] == '\0') break; ++i; - if (wflag && !ISWORD(lp1->l_name[i - 1])) + if (wflag && !byteinword(lp1->l_name, i - 1, name_len - i + 1)) break; } return (i - cpos); @@ -844,9 +852,11 @@ ewprintf(const char *fmt, ...) * %k prints the name of the current key (and takes no arguments). * %d prints a decimal integer * %o prints an octal integer + * %x prints a hexadecimal integer * %p prints a pointer * %s prints a string * %ld prints a long word + * %lx prints a hexadecimal long word * Anything else is echoed verbatim */ static void @@ -885,6 +895,10 @@ eformat(const char *fp, va_list ap) eputi(va_arg(ap, int), 8); break; + case 'x': + eputi(va_arg(ap, int), 16); + break; + case 'p': snprintf(tmp, sizeof(tmp), "%p", va_arg(ap, void *)); @@ -902,6 +916,9 @@ eformat(const char *fp, va_list ap) case 'd': eputl(va_arg(ap, long), 10); break; + case 'x': + eputl(va_arg(ap, long), 16); + break; default: eputc(c); break; @@ -939,6 +956,7 @@ static void eputl(long l, int r) { long q; + int c; if (l < 0) { eputc('-'); @@ -946,7 +964,10 @@ eputl(long l, int r) } if ((q = l / r) != 0) eputl(q, r); - eputc((int)(l % r) + '0'); + c = (int)(l % r) + '0'; + if (c > '9') + c += 'a' - '9' - 1; + eputc(c); } /* @@ -969,7 +990,7 @@ static void eputc(char c) { if (ttcol + 2 < ncol) { - if (ISCTRL(c)) { + if (iscntrl(c)) { eputc('^'); c = CCHR(c); } Index: extend.c =================================================================== RCS file: /cvs/src/usr.bin/mg/extend.c,v retrieving revision 1.64 diff -u -p -u -p -r1.64 extend.c --- extend.c 1 Sep 2016 21:06:09 -0000 1.64 +++ extend.c 30 May 2018 13:34:01 -0000 @@ -15,7 +15,6 @@ #include <stdlib.h> #include <string.h> -#include "chrdef.h" #include "def.h" #include "funmap.h" #include "kbd.h" @@ -349,7 +348,7 @@ dobind(KEYMAP *curmap, const char *p, in } if (inmacro) { for (s = 0; s < maclcur->l_used - 1; s++) { - if (doscan(curmap, c = CHARMASK(maclcur->l_text[s]), &curmap) + if (doscan(curmap, c = (unsigned char)(maclcur->l_text[s]), &curmap) != NULL) { if (remap(curmap, c, NULL, NULL) != TRUE) @@ -726,7 +725,7 @@ excline(char *line) if (*line != '\0') { *line++ = '\0'; line = skipwhite(line); - if (ISDIGIT(*line) || *line == '-') { + if (isdigit(*line) || *line == '-') { argp = line; line = parsetoken(line); } @@ -815,9 +814,9 @@ excline(char *line) * split into two statements * due to bug in OSK cpp */ - c = CHARMASK(*++argp); - c = ISLOWER(c) ? - CCHR(TOUPPER(c)) : CCHR(c); + c = (unsigned char)(*++argp); + c = islower(c) ? + CCHR(toupper(c)) : CCHR(c); break; case '0': case '1': @@ -843,14 +842,14 @@ excline(char *line) case 'f': case 'F': c = *++argp - '0'; - if (ISDIGIT(argp[1])) { + if (isdigit(argp[1])) { c *= 10; c += *++argp - '0'; } - c += KFIRST; + c += 256; break; default: - c = CHARMASK(*argp); + c = (unsigned char)(*argp); break; } argp++; Index: funmap.c =================================================================== RCS file: /cvs/src/usr.bin/mg/funmap.c,v retrieving revision 1.53 diff -u -p -u -p -r1.53 funmap.c --- funmap.c 14 Apr 2016 17:05:32 -0000 1.53 +++ funmap.c 30 May 2018 13:34:01 -0000 @@ -114,6 +114,7 @@ static struct funmap functnames[] = { {bufferinsert, "insert-buffer",}, {fileinsert, "insert-file",}, {fillword, "insert-with-wrap",}, + {insert_char, "insert-char",}, {backisearch, "isearch-backward",}, {forwisearch, "isearch-forward",}, {joinline, "join-line",}, @@ -191,6 +192,7 @@ static struct funmap functnames[] = { {shellcommand, "shell-command",}, {piperegion, "shell-command-on-region",}, {shrinkwind, "shrink-window",}, + {show_raw_mode, "show-raw-mode",}, #ifdef NOTAB {space_to_tabstop, "space-to-tabstop",}, #endif /* NOTAB */ Index: help.c =================================================================== RCS file: /cvs/src/usr.bin/mg/help.c,v retrieving revision 1.35 diff -u -p -u -p -r1.35 help.c --- help.c 19 Mar 2015 21:22:15 -0000 1.35 +++ help.c 30 May 2018 13:34:01 -0000 @@ -6,6 +6,7 @@ * Help functions for Mg 2 */ +#include <ctype.h> #include <sys/queue.h> #include <signal.h> #include <stdio.h> @@ -57,9 +58,9 @@ desckey(int f, int n) } if (funct != rescan) break; - if (ISUPPER(key.k_chars[key.k_count - 1])) { + if (isupper(key.k_chars[key.k_count - 1])) { funct = doscan(curmap, - TOLOWER(key.k_chars[key.k_count - 1]), &curmap); + tolower(key.k_chars[key.k_count - 1]), &curmap); if (funct == NULL) { *pep++ = '-'; *pep = '\0'; Index: kbd.c =================================================================== RCS file: /cvs/src/usr.bin/mg/kbd.c,v retrieving revision 1.30 diff -u -p -u -p -r1.30 kbd.c --- kbd.c 26 Sep 2015 21:51:58 -0000 1.30 +++ kbd.c 30 May 2018 13:34:01 -0000 @@ -6,9 +6,13 @@ * Terminal independent keyboard handling. */ +#include <ctype.h> +#include <limits.h> #include <sys/queue.h> #include <signal.h> #include <stdio.h> +#include <stdlib.h> +#include <wchar.h> #include "def.h" #include "kbd.h" @@ -168,8 +172,8 @@ rescan(int f, int n) int md = curbp->b_nmodes; for (;;) { - if (ISUPPER(key.k_chars[key.k_count - 1])) { - c = TOLOWER(key.k_chars[key.k_count - 1]); + if (isupper(key.k_chars[key.k_count - 1])) { + c = tolower(key.k_chars[key.k_count - 1]); curmap = curbp->b_modes[md]->p_map; for (i = 0; i < key.k_count - 1; i++) { if ((fp = doscan(curmap, (key.k_chars[i]), @@ -403,6 +407,43 @@ quote(int f, int n) ungetkey(c); } return (selfinsert(f, n)); +} + +/* + * Prompt for a codepoint in whatever the native system's encoding is, + * insert it into the file + */ +int +insert_char(int f, int n) +{ + char *bufp; + char inpbuf[32]; + wchar_t wc; + char mb[MB_LEN_MAX + 1]; + mbstate_t mbs = { 0 }; + size_t mbslen; + size_t i; + + if ((bufp = eread("Insert character (hex): ", inpbuf, sizeof inpbuf, + EFNEW)) == NULL) { + return (ABORT); + } else if (bufp[0] == '\0') { + return (FALSE); + } + + wc = (wchar_t) strtoll(bufp, NULL, 16); + mbslen = wcrtomb(mb, wc, &mbs); + if (mbslen == (size_t) -1) { + return (FALSE); + } + + for (i = 0; i < mbslen; ++i) { + if (linsert(1, mb[i]) == FALSE) { + return (FALSE); + } + } + + return (TRUE); } /* Index: keymap.c =================================================================== RCS file: /cvs/src/usr.bin/mg/keymap.c,v retrieving revision 1.58 diff -u -p -u -p -r1.58 keymap.c --- keymap.c 29 Dec 2015 19:44:32 -0000 1.58 +++ keymap.c 30 May 2018 13:34:01 -0000 @@ -120,6 +120,21 @@ static struct KEYMAPE (2) cX4map = { } }; +static PF cX8J[] = { + insert_char /* ^M */ +}; + +static struct KEYMAPE (1) cX8map = { + 1, + 1, + rescan, + { + { + CCHR('M'), CCHR('M'), cX8J, NULL + } + } +}; + static PF cXcB[] = { listbuffers, /* ^B */ quit, /* ^C */ @@ -158,6 +173,10 @@ static PF cX0[] = { NULL /* 4 */ }; +static PF cX8[] = { + NULL /* 4 */ +}; + static PF cXeq[] = { showcpos /* = */ }; @@ -189,9 +208,9 @@ static PF cXcar[] = { undo /* u */ }; -struct KEYMAPE (6) cXmap = { - 6, - 6, +struct KEYMAPE (7) cXmap = { + 7, + 7, rescan, { { @@ -207,6 +226,9 @@ struct KEYMAPE (6) cXmap = { '0', '4', cX0, (KEYMAP *) & cX4map }, { + '8', '8', cX8, (KEYMAP *) & cX8map + }, + { '=', '=', cXeq, NULL }, { @@ -491,6 +513,18 @@ static struct KEYMAPE (1) overwmap = { } }; +static struct KEYMAPE (1) rawmap = { + 0, + 1, /* 1 to avoid 0 sized array */ + rescan, + { + /* unused dummy entry for VMS C */ + { + (KCHAR)0, (KCHAR)0, NULL, NULL + } + } +}; + /* * The basic (root) keyboard map @@ -513,6 +547,7 @@ static struct maps_s map_table[] = { {(KEYMAP *) ¬abmap, "notab",}, #endif /* NOTAB */ {(KEYMAP *) &overwmap, "overwrite",}, + {(KEYMAP *) &rawmap, "raw",}, {(KEYMAP *) &metamap, "esc prefix",}, {(KEYMAP *) &cXmap, "c-x prefix",}, {(KEYMAP *) &cX4map, "c-x 4 prefix",}, Index: line.c =================================================================== RCS file: /cvs/src/usr.bin/mg/line.c,v retrieving revision 1.59 diff -u -p -u -p -r1.59 line.c --- line.c 9 Sep 2017 13:10:28 -0000 1.59 +++ line.c 30 May 2018 13:34:01 -0000 @@ -23,6 +23,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> #include "def.h" @@ -327,15 +328,15 @@ lnewline(void) } /* - * This function deletes "n" bytes, starting at dot. (actually, n+1, as the - * newline is included) It understands how to deal with end of lines, etc. - * It returns TRUE if all of the characters were deleted, and FALSE if - * they were not (because dot ran into the end of the buffer). - * The "kflag" indicates either no insertion, or direction of insertion - * into the kill buffer. + * This function deletes "n" bytes, starting at dot. (actually, + * n+1, as the newline is included) It understands how to deal with + * end of lines, etc. It returns TRUE if all of the characters + * were deleted, and FALSE if they were not (because dot ran into + * the end of the buffer). The "kflag" indicates either no insertion, + * or direction of insertion into the kill buffer. */ int -ldelete(RSIZE n, int kflag) +ldeletebyte(RSIZE n, int kflag) { struct line *dotp; RSIZE chunk; @@ -355,6 +356,7 @@ ldelete(RSIZE n, int kflag) ewprintf("Buffer is read only"); goto out; } + len = n; if ((sv = calloc(1, len + 1)) == NULL) goto out; @@ -417,6 +419,50 @@ out: } /* + * This function is like ldeletebyte, but it deletes "n" characters. + */ +int +ldeletechar(RSIZE n, int kflag) +{ + struct line *dotp; + int doto; + + /* + * If we're in a wide context, the caller meant n *characters*, + * but we prefer to work in bytes, so let's expand out n. + */ + if (!(curbp->b_flag & BFSHOWRAW)) { + size_t new_n = 0; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = 0; + dotp = curwp->w_dotp; + doto = curwp->w_doto; + for (size_t j = 0; j < n; ++j) { + if (doto == llength(dotp)) { + doto = 0; + dotp = lforw(dotp); + new_n++; + } else { + size_t len = llength(dotp) - doto; + consumed = mbrtowc(&wc, + &dotp->l_text[doto], + len, &mbs); + if (consumed < (size_t) -2) { + new_n += consumed; + doto += consumed; + } else { + new_n++; + doto++; + } + } + } + return ldeletebyte(new_n, kflag); + } + return ldeletebyte(n, kflag); +} + +/* * Delete a newline and join the current line with the next line. If the next * line is the magic header line always return TRUE; merging the last line * with the header line can be thought of as always being a successful @@ -523,7 +569,7 @@ lreplace(RSIZE plen, char *st) undo_boundary_enable(FFRAND, 0); (void)backchar(FFARG | FFRAND, (int)plen); - (void)ldelete(plen, KNONE); + (void)ldeletechar(plen, KNONE); rlen = strlen(st); region_put_data(st, rlen); Index: match.c =================================================================== RCS file: /cvs/src/usr.bin/mg/match.c,v retrieving revision 1.19 diff -u -p -u -p -r1.19 match.c --- match.c 3 Jun 2015 23:40:01 -0000 1.19 +++ match.c 30 May 2018 13:34:01 -0000 @@ -10,6 +10,7 @@ * but there's enough overhead in the editor as it is. */ +#include <ctype.h> #include <sys/queue.h> #include <signal.h> #include <stdio.h> @@ -172,7 +173,7 @@ displaymatch(struct line *clp, int cbo) || (curbp->b_flag & BFNOTAB) #endif ) - if (ISCTRL(c)) { + if (iscntrl(c)) { buf[bufo++] = '^'; buf[bufo++] = CCHR(c); } else Index: mg.1 =================================================================== RCS file: /cvs/src/usr.bin/mg/mg.1,v retrieving revision 1.106 diff -u -p -u -p -r1.106 mg.1 --- mg.1 11 Dec 2017 07:27:07 -0000 1.106 +++ mg.1 30 May 2018 13:34:01 -0000 @@ -1,7 +1,7 @@ .\" $OpenBSD: mg.1,v 1.106 2017/12/11 07:27:07 jmc Exp $ .\" This file is in the public domain. .\" -.Dd $Mdocdate: December 11 2017 $ +.Dd $Mdocdate: May 28 2018 $ .Dt MG 1 .Os .Sh NAME @@ -849,7 +849,7 @@ This is the default. .It set-default-mode Append the supplied mode to the list of default modes used by subsequent buffer creation. -Built in modes include: fill, indent and overwrite. +Built in modes include: fill, indent, overwrite, and raw. .It set-fill-column Prompt the user for a fill column. Used by auto-fill-mode. @@ -861,6 +861,8 @@ Sets the prefix string to be used by the Execute external command from mini-buffer. .It shell-command-on-region Provide the text in region to the shell command as input. +.It show-raw-mode +ASCII encoding mode. .It shrink-window Shrink current window by one line. The window immediately below is expanded to pick up the slack. @@ -1091,5 +1093,3 @@ In order to use 8-bit characters (such a needs to be disabled via the .Dq meta-key-mode command. -.Pp -Multi-byte character sets, such as UTF-8, are not supported. Index: modes.c =================================================================== RCS file: /cvs/src/usr.bin/mg/modes.c,v retrieving revision 1.21 diff -u -p -u -p -r1.21 modes.c --- modes.c 30 May 2017 07:05:22 -0000 1.21 +++ modes.c 30 May 2018 13:34:01 -0000 @@ -111,6 +111,23 @@ overwrite_mode(int f, int n) } int +show_raw_mode(int f, int n) +{ + if (changemode(f, n, "raw") == FALSE) + return (FALSE); + if (f & FFARG) { + if (n <= 0) + curbp->b_flag &= ~BFSHOWRAW; + else + curbp->b_flag |= BFSHOWRAW; + } else + curbp->b_flag ^= BFSHOWRAW; + + sgarbf = TRUE; + return (TRUE); +} + +int set_default_mode(int f, int n) { int i; @@ -170,5 +187,11 @@ set_default_mode(int f, int n) defb_flag |= BFNOTAB; } #endif /* NOTAB */ + if (strcmp(modebuf, "raw") == 0) { + if (n <= 0) + defb_flag &= ~BFSHOWRAW; + else + defb_flag |= BFSHOWRAW; + } return (TRUE); } Index: paragraph.c =================================================================== RCS file: /cvs/src/usr.bin/mg/paragraph.c,v retrieving revision 1.45 diff -u -p -u -p -r1.45 paragraph.c --- paragraph.c 6 Sep 2016 16:25:47 -0000 1.45 +++ paragraph.c 30 May 2018 13:34:01 -0000 @@ -24,6 +24,16 @@ static int findpara(void); static int do_gotoeop(int, int, int *); /* + * Returns non-zero iff c is "end of sentence punctuation". Used + * only for determining whether to double-space sentences. + */ +static int +iseosp(int c) +{ + return (c == '?' || c == '!' || c == '.'); +} + +/* * Move to start of paragraph. * Move backwards by line, checking from the 1st character forwards for the * existence a non-space. If a non-space character is found, move to the @@ -179,7 +189,7 @@ fillpara(int f, int n) c = lgetc(curwp->w_dotp, curwp->w_doto); /* and then delete it */ - if (ldelete((RSIZE) 1, KNONE) == FALSE && !eopflag) { + if (ldeletechar((RSIZE) 1, KNONE) == FALSE && !eopflag) { retval = FALSE; goto cleanup; } @@ -207,13 +217,16 @@ fillpara(int f, int n) * character was one of '.','?','!' doublespace here. * behave the same way if a ')' is preceded by a * [.?!] and followed by a doublespace. + * + * These rules are too complicated for multibyte + * characters, so they just get single-spaced. */ if (dblspace && (!eopflag && ((eolflag || curwp->w_doto == llength(curwp->w_dotp) || (c = lgetc(curwp->w_dotp, curwp->w_doto)) == ' ' - || c == '\t') && (ISEOSP(wbuf[wordlen - 1]) || + || c == '\t') && (iseosp(wbuf[wordlen - 1]) || (wbuf[wordlen - 1] == ')' && wordlen >= 2 && - ISEOSP(wbuf[wordlen - 2])))) && + iseosp(wbuf[wordlen - 2])))) && wordlen < MAXWORD - 1)) wbuf[wordlen++] = ' '; @@ -229,7 +242,7 @@ fillpara(int f, int n) if (curwp->w_doto > 0 && lgetc(curwp->w_dotp, curwp->w_doto - 1) == ' ') { curwp->w_doto -= 1; - (void)ldelete((RSIZE) 1, KNONE); + (void)ldeletechar((RSIZE) 1, KNONE); } /* start a new line */ (void)lnewline(); @@ -421,7 +434,7 @@ fillword(int f, int n) #endif ) col |= 0x07; - else if (ISCTRL(c) != FALSE) + else if (iscntrl(c) != FALSE) ++col; } if (curwp->w_doto != llength(curwp->w_dotp)) { Index: re_search.c =================================================================== RCS file: /cvs/src/usr.bin/mg/re_search.c,v retrieving revision 1.33 diff -u -p -u -p -r1.33 re_search.c --- re_search.c 6 Aug 2017 04:39:45 -0000 1.33 +++ re_search.c 30 May 2018 13:34:01 -0000 @@ -546,7 +546,7 @@ killmatches(int cond) curwp->w_doto = 0; curwp->w_dotp = clp; count++; - s = ldelete(llength(clp) + 1, KNONE); + s = ldeletebyte(llength(clp) + 1, KNONE); clp = curwp->w_dotp; curwp->w_rflag |= WFMOVE; if (s == FALSE) Index: region.c =================================================================== RCS file: /cvs/src/usr.bin/mg/region.c,v retrieving revision 1.37 diff -u -p -u -p -r1.37 region.c --- region.c 9 Sep 2016 06:05:51 -0000 1.37 +++ region.c 30 May 2018 13:34:01 -0000 @@ -15,12 +15,15 @@ #include <sys/wait.h> #include <errno.h> #include <fcntl.h> +#include <limits.h> #include <poll.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <wchar.h> +#include <wctype.h> #include "def.h" @@ -55,7 +58,7 @@ killregion(int f, int n) curwp->w_dotp = region.r_linep; curwp->w_doto = region.r_offset; curwp->w_dotline = region.r_lineno; - s = ldelete(region.r_size, KFORW | KREG); + s = ldeletebyte(region.r_size, KFORW | KREG); clearmark(FFARG, 0); return (s); @@ -118,7 +121,13 @@ lowerregion(int f, int n) { struct line *linep; struct region region; - int loffs, c, s; + int loffs, s; + char t[MB_LEN_MAX + 1]; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + wchar_t wcl = 0; + size_t consumed = 0; + size_t mbslen = 0; if ((s = checkdirty(curbp)) != TRUE) return (s); @@ -141,10 +150,26 @@ lowerregion(int f, int n) linep = lforw(linep); loffs = 0; } else { - c = lgetc(linep, loffs); - if (ISUPPER(c) != FALSE) - lputc(linep, loffs, TOLOWER(c)); - ++loffs; + consumed = mbrtowc(&wc, &linep->l_text[loffs], + llength(linep) - loffs, &mbs); + if (consumed < (size_t) -2) { + wcl = towlower(wc); + mbslen = wcrtomb(t, wcl, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(linep, loffs, t[k]); + loffs++; + } + if (consumed > 0) { + region.r_size -= (consumed - 1); + } + } else { + loffs++; + } + } else { + mbs = (mbstate_t) { 0 }; + loffs++; + } } } return (TRUE); @@ -160,9 +185,15 @@ lowerregion(int f, int n) int upperregion(int f, int n) { - struct line *linep; - struct region region; - int loffs, c, s; + struct line *linep; + struct region region; + int loffs, s; + char t[MB_LEN_MAX + 1]; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + wchar_t wcu = 0; + size_t consumed = 0; + size_t mbslen = 0; if ((s = checkdirty(curbp)) != TRUE) return (s); @@ -171,6 +202,7 @@ upperregion(int f, int n) ewprintf("Buffer is read-only"); return (FALSE); } + if ((s = getregion(®ion)) != TRUE) return (s); @@ -184,10 +216,26 @@ upperregion(int f, int n) linep = lforw(linep); loffs = 0; } else { - c = lgetc(linep, loffs); - if (ISLOWER(c) != FALSE) - lputc(linep, loffs, TOUPPER(c)); - ++loffs; + consumed = mbrtowc(&wc, &linep->l_text[loffs], + llength(linep) - loffs, &mbs); + if (consumed < (size_t) -2) { + wcu = towupper(wc); + mbslen = wcrtomb(t, wcu, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(linep, loffs, t[k]); + loffs++; + } + if (consumed > 0) { + region.r_size -= (consumed - 1); + } + } else { + loffs++; + } + } else { + mbs = (mbstate_t) { 0 }; + loffs++; + } } } return (TRUE); Index: search.c =================================================================== RCS file: /cvs/src/usr.bin/mg/search.c,v retrieving revision 1.46 diff -u -p -u -p -r1.46 search.c --- search.c 6 Aug 2017 04:39:45 -0000 1.46 +++ search.c 30 May 2018 13:34:01 -0000 @@ -322,7 +322,7 @@ isearch(int dir) /* if the search is case insensitive, add to pattern using lowercase */ xcase = 0; for (i = 0; pat[i]; i++) - if (ISUPPER(CHARMASK(pat[i]))) + if (isupper((unsigned char)pat[i])) xcase = 1; while (cbo < llength(clp)) { @@ -335,8 +335,8 @@ isearch(int dir) break; } firstc = 0; - if (!xcase && ISUPPER(c)) - c = TOLOWER(c); + if (!xcase && isupper(c)) + c = tolower(c); pat[pptr++] = c; pat[pptr] = '\0'; @@ -364,7 +364,7 @@ isearch(int dir) c = CCHR('J'); goto addchar; default: - if (ISCTRL(c)) { + if (iscntrl(c)) { ungetkey(c); curwp->w_markp = clp; curwp->w_marko = cbo; @@ -694,7 +694,7 @@ forwsrch(void) cbo = curwp->w_doto; nline = curwp->w_dotline; for (i = 0; pat[i]; i++) - if (ISUPPER(CHARMASK(pat[i]))) + if (isupper((unsigned char)pat[i])) xcase = 1; for (;;) { if (cbo == llength(clp)) { @@ -755,7 +755,7 @@ backsrch(void) cbo = curwp->w_doto; nline = curwp->w_dotline; for (i = 0; pat[i]; i++) - if (ISUPPER(CHARMASK(pat[i]))) + if (isupper((unsigned char)pat[i])) xcase = 1; for (;;) { if (cbo == 0) { @@ -809,16 +809,16 @@ fail: ; static int eq(int bc, int pc, int xcase) { - bc = CHARMASK(bc); - pc = CHARMASK(pc); + bc = (unsigned char)bc; + pc = (unsigned char)pc; if (bc == pc) return (TRUE); if (xcase) return (FALSE); - if (ISUPPER(bc)) - return (TOLOWER(bc) == pc); - if (ISUPPER(pc)) - return (bc == TOLOWER(pc)); + if (isupper(bc)) + return (tolower(bc) == pc); + if (isupper(pc)) + return (bc == tolower(pc)); return (FALSE); } Index: tags.c =================================================================== RCS file: /cvs/src/usr.bin/mg/tags.c,v retrieving revision 1.16 diff -u -p -u -p -r1.16 tags.c --- tags.c 6 Aug 2017 04:39:45 -0000 1.16 +++ tags.c 30 May 2018 13:34:01 -0000 @@ -49,7 +49,7 @@ struct ctag { char *pat; }; RB_HEAD(tagtree, ctag) tags = RB_INITIALIZER(&tags); -RB_GENERATE(tagtree, ctag, entry, ctagcmp); +RB_GENERATE(tagtree, ctag, entry, ctagcmp) struct tagpos { SLIST_ENTRY(tagpos) entry; @@ -433,10 +433,15 @@ searchpat(char *s_pat) int atbow(void) { + size_t len = llength(curwp->w_dotp); + struct line *clp = curwp->w_dotp; + size_t o = curwp->w_doto; + if (curwp->w_doto == 0) return (TRUE); - if (ISWORD(curwp->w_dotp->l_text[curwp->w_doto]) && - !ISWORD(curwp->w_dotp->l_text[curwp->w_doto - 1])) + + if (byteinword(clp->l_text, o, len - o) && + !byteinword(clp->l_text, o - 1, 1)) return (TRUE); return (FALSE); } @@ -449,15 +454,13 @@ curtoken(int f, int n, char *token) { struct line *odotp; int odoto, tdoto, odotline, size, r; - char c; /* Underscore character is to be treated as "inword" while * processing tokens unlike mg's default word traversal. Save * and restore it's cinfo value so that tag matching works for * identifier with underscore. */ - c = cinfo['_']; - cinfo['_'] = _MG_W; + treatunderscoreasword(1); odotp = curwp->w_dotp; odoto = curwp->w_doto; @@ -491,10 +494,10 @@ curtoken(int f, int n, char *token) r = TRUE; cleanup: - cinfo['_'] = c; curwp->w_dotp = odotp; curwp->w_doto = odoto; curwp->w_dotline = odotline; + treatunderscoreasword(0); return (r); } Index: ttykbd.c =================================================================== RCS file: /cvs/src/usr.bin/mg/ttykbd.c,v retrieving revision 1.19 diff -u -p -u -p -r1.19 ttykbd.c --- ttykbd.c 17 Dec 2017 14:37:57 -0000 1.19 +++ ttykbd.c 30 May 2018 13:34:01 -0000 @@ -18,12 +18,6 @@ #include "kbd.h" /* - * Get keyboard character. Very simple if you use keymaps and keys files. - */ - -char *keystrings[] = {NULL}; - -/* * Turn on function keys using keypad_xmit, then load a keys file, if * available. The keys file is located in the same manner as the startup * file is, depending on what startupfile() does on your system. Index: undo.c =================================================================== RCS file: /cvs/src/usr.bin/mg/undo.c,v retrieving revision 1.58 diff -u -p -u -p -r1.58 undo.c --- undo.c 5 Sep 2016 08:10:58 -0000 1.58 +++ undo.c 30 May 2018 13:34:01 -0000 @@ -562,7 +562,7 @@ undo(int f, int n) */ switch (ptr->type) { case INSERT: - ldelete(ptr->region.r_size, KNONE); + ldeletebyte(ptr->region.r_size, KNONE); break; case DELETE: lp = curwp->w_dotp; Index: util.c =================================================================== RCS file: /cvs/src/usr.bin/mg/util.c,v retrieving revision 1.38 diff -u -p -u -p -r1.38 util.c --- util.c 18 Nov 2015 18:21:06 -0000 1.38 +++ util.c 30 May 2018 13:34:01 -0000 @@ -11,8 +11,12 @@ #include <sys/queue.h> #include <ctype.h> +#include <limits.h> #include <signal.h> #include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <wchar.h> #include "def.h" @@ -33,6 +37,9 @@ showcpos(int f, int n) int nline, row; int cline, cbyte; /* Current line/char/byte */ int ratio; + char ismb = 0; + wchar_t wc = 0; + char mbc[MB_LEN_MAX + 1]; /* collect the data */ clp = bfirstlp(curbp); @@ -69,8 +76,43 @@ showcpos(int f, int n) clp = lforw(clp); } ratio = nchar ? (100L * cchar) / nchar : 100; - ewprintf("Char: %c (0%o) point=%ld(%d%%) line=%d row=%d col=%d", - cbyte, cbyte, cchar, ratio, cline, row, getcolpos(curwp)); + + if (!(curbp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + size_t consumed = 0; + size_t offset = 0; + while (cbyte != '\n' && offset <= curwp->w_doto) { + int c = lgetc(clp, curwp->w_doto - offset); + if (isprint(c) || (iscntrl(c) != FALSE)) { + break; + } + consumed = mbrtowc(&wc, + &clp->l_text[curwp->w_doto - offset], + llength(clp) - curwp->w_doto + offset, + &mbs); + if (consumed < (size_t) -2) { + ismb = (offset < consumed); + snprintf(mbc, consumed + 1, "%s", + &clp->l_text[curwp->w_doto - offset]); + mbc[consumed + 1] = '\0'; + break; + } else { + memset(&mbs, 0, sizeof mbs); + } + offset++; + } + } + + if (ismb) { + ewprintf("Char: %s (codepoint 0x%lx) Byte: %c (0%o) " + "point=%ld(%d%%) line=%d row=%d col=%d", mbc, + (long) wc, cbyte, cbyte, cchar, ratio, cline, row, + getcolpos(curwp)); + } else { + ewprintf("Char: %c (0%o) point=%ld(%d%%) line=%d row=%d" + "col=%d", cbyte, cbyte, cchar, ratio, cline, row, + getcolpos(curwp)); + } return (TRUE); } @@ -92,10 +134,26 @@ getcolpos(struct mgwin *wp) ) { col |= 0x07; col++; - } else if (ISCTRL(c) != FALSE) + } else if (iscntrl(c) != FALSE) col += 2; else if (isprint(c)) { col++; + } else if (!(wp->w_bufp->b_flag & BFSHOWRAW)) { + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + size_t consumed = mbrtowc(&wc, &wp->w_dotp->l_text[i], + llength(wp->w_dotp) - i, + &mbs); + int width = -1; + if (consumed < (size_t) -2) { + width = wcwidth(wc); + } + if (width >= 0) { + col += width; + i += (consumed - 1); + } else { + col += snprintf(tmp, sizeof(tmp), "\\%o", c); + } } else { col += snprintf(tmp, sizeof(tmp), "\\%o", c); } @@ -234,7 +292,7 @@ deblank(int f, int n) return (TRUE); curwp->w_dotp = lforw(lp1); curwp->w_doto = 0; - return (ldelete((RSIZE)nld, KNONE)); + return (ldeletechar((RSIZE)nld, KNONE)); } /* @@ -275,7 +333,7 @@ delwhite(int f, int n) if (s == TRUE) (void)forwchar(FFRAND, 1); - (void)ldelete((RSIZE)(col - curwp->w_doto), KNONE); + (void)ldeletebyte((RSIZE)(col - curwp->w_doto), KNONE); return (TRUE); } @@ -427,7 +485,7 @@ forwdel(int f, int n) thisflag |= CFKILL; } - return (ldelete((RSIZE) n, (f & FFARG) ? KFORW : KNONE)); + return (ldeletechar((RSIZE) n, (f & FFARG) ? KFORW : KNONE)); } /* @@ -451,7 +509,7 @@ backdel(int f, int n) thisflag |= CFKILL; } if ((s = backchar(f | FFRAND, n)) == TRUE) - s = ldelete((RSIZE)n, (f & FFARG) ? KFORW : KNONE); + s = ldeletechar((RSIZE)n, (f & FFARG) ? KFORW : KNONE); return (s); } Index: word.c =================================================================== RCS file: /cvs/src/usr.bin/mg/word.c,v retrieving revision 1.19 diff -u -p -u -p -r1.19 word.c --- word.c 30 Dec 2015 20:51:51 -0000 1.19 +++ word.c 30 May 2018 13:34:01 -0000 @@ -11,9 +11,12 @@ #include <sys/queue.h> #include <signal.h> #include <errno.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> +#include <wctype.h> #include "def.h" @@ -231,8 +234,16 @@ grabword(char **word) int upperword(int f, int n) { - int c, s; - RSIZE size; + int s; + RSIZE size; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + wchar_t wcu = 0; + size_t consumed = 0; + size_t mbslen = 0; + struct line *clp; + size_t o; + char t[MB_LEN_MAX + 1]; if ((s = checkdirty(curbp)) != TRUE) return (s); @@ -253,12 +264,28 @@ upperword(int f, int n) undo_add_change(curwp->w_dotp, curwp->w_doto, size); while (inword() != FALSE) { - c = lgetc(curwp->w_dotp, curwp->w_doto); - if (ISLOWER(c) != FALSE) { - c = TOUPPER(c); - lputc(curwp->w_dotp, curwp->w_doto, c); - lchange(WFFULL); + clp = curwp->w_dotp; + o = curwp->w_doto; + consumed = mbrtowc(&wc, &clp->l_text[o], + llength(clp) - o, &mbs); + if (consumed < (size_t) -2) { + wcu = towupper(wc); + mbslen = wcrtomb(t, wcu, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(clp, o, t[k]); + o++; + } + } else { + o++; + } + } else { + mbs = (mbstate_t) { 0 }; + o++; } + + lchange(WFFULL); + if (forwchar(FFRAND, 1) == FALSE) return (TRUE); } @@ -274,8 +301,16 @@ upperword(int f, int n) int lowerword(int f, int n) { - int c, s; - RSIZE size; + int s; + RSIZE size; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + wchar_t wcu = 0; + size_t consumed = 0; + size_t mbslen = 0; + struct line *clp; + size_t o; + char t[MB_LEN_MAX + 1]; if ((s = checkdirty(curbp)) != TRUE) return (s); @@ -284,6 +319,7 @@ lowerword(int f, int n) ewprintf("Buffer is read-only"); return (FALSE); } + if (n < 0) return (FALSE); while (n--) { @@ -295,12 +331,28 @@ lowerword(int f, int n) undo_add_change(curwp->w_dotp, curwp->w_doto, size); while (inword() != FALSE) { - c = lgetc(curwp->w_dotp, curwp->w_doto); - if (ISUPPER(c) != FALSE) { - c = TOLOWER(c); - lputc(curwp->w_dotp, curwp->w_doto, c); - lchange(WFFULL); + clp = curwp->w_dotp; + o = curwp->w_doto; + consumed = mbrtowc(&wc, &clp->l_text[o], + llength(clp) - o, &mbs); + if (consumed < (size_t) -2) { + wcu = towlower(wc); + mbslen = wcrtomb(t, wcu, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(clp, o, t[k]); + o++; + } + } else { + o++; + } + } else { + mbs = (mbstate_t) { 0 }; + o++; } + + lchange(WFFULL); + if (forwchar(FFRAND, 1) == FALSE) return (TRUE); } @@ -318,8 +370,17 @@ lowerword(int f, int n) int capword(int f, int n) { - int c, s; - RSIZE size; + int s; + RSIZE size; + mbstate_t mbs = { 0 }; + wchar_t wc = 0; + wchar_t wcu = 0; + wchar_t wcl = 0; + size_t consumed = 0; + size_t mbslen = 0; + struct line *clp; + size_t o; + char t[MB_LEN_MAX + 1]; if ((s = checkdirty(curbp)) != TRUE) return (s); @@ -340,21 +401,46 @@ capword(int f, int n) undo_add_change(curwp->w_dotp, curwp->w_doto, size); if (inword() != FALSE) { - c = lgetc(curwp->w_dotp, curwp->w_doto); - if (ISLOWER(c) != FALSE) { - c = TOUPPER(c); - lputc(curwp->w_dotp, curwp->w_doto, c); - lchange(WFFULL); + clp = curwp->w_dotp; + o = curwp->w_doto; + consumed = mbrtowc(&wc, &clp->l_text[o], + llength(clp) - o, &mbs); + if (consumed < (size_t) -2) { + wcu = towupper(wc); + mbslen = wcrtomb(t, wcu, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(clp, o, t[k]); + o++; + } + lchange(WFFULL); + } + } else { + mbs = (mbstate_t) { 0 }; } + if (forwchar(FFRAND, 1) == FALSE) return (TRUE); + while (inword() != FALSE) { - c = lgetc(curwp->w_dotp, curwp->w_doto); - if (ISUPPER(c) != FALSE) { - c = TOLOWER(c); - lputc(curwp->w_dotp, curwp->w_doto, c); - lchange(WFFULL); + clp = curwp->w_dotp; + o = curwp->w_doto; + consumed = mbrtowc(&wc, &clp->l_text[o], + llength(clp) - o, &mbs); + if (consumed < (size_t) -2) { + wcl = towlower(wc); + mbslen = wcrtomb(t, wcl, &mbs); + if (mbslen == consumed) { + for (size_t k = 0; k < consumed; ++k) { + lputc(clp, o, t[k]); + o++; + } + lchange(WFFULL); + } + } else { + mbs = (mbstate_t) { 0 }; } + if (forwchar(FFRAND, 1) == FALSE) return (TRUE); } @@ -364,7 +450,7 @@ capword(int f, int n) } /* - * Count characters in word, from current position + * Count bytes in word, from current position */ RSIZE countfword() @@ -372,16 +458,24 @@ countfword() RSIZE size; struct line *dotp; int doto; + struct line *last_dotp; + int last_doto; dotp = curwp->w_dotp; doto = curwp->w_doto; size = 0; while (inword() != FALSE) { + last_dotp = curwp->w_dotp; + last_doto = curwp->w_doto; if (forwchar(FFRAND, 1) == FALSE) /* hit the end of the buffer */ goto out; - ++size; + if (last_dotp == curwp->w_dotp) { + size += (curwp->w_doto - last_doto); + } else { + size++; + } } out: curwp->w_dotp = dotp; @@ -438,7 +532,7 @@ delfword(int f, int n) out: curwp->w_dotp = dotp; curwp->w_doto = doto; - return (ldelete(size, KFORW)); + return (ldeletechar(size, KFORW)); } /* @@ -498,7 +592,7 @@ delbword(int f, int n) /* undo assumed delete */ --size; out: - return (ldelete(size, KBACK)); + return (ldeletechar(size, KBACK)); } /* @@ -509,6 +603,7 @@ int inword(void) { /* can't use lgetc in ISWORD due to bug in OSK cpp */ - return (curwp->w_doto != llength(curwp->w_dotp) && - ISWORD(curwp->w_dotp->l_text[curwp->w_doto])); + size_t l = llength(curwp->w_dotp); + size_t k = curwp->w_doto; + return (k != l) && byteinword(curwp->w_dotp->l_text, k, l - k); } Index: yank.c =================================================================== RCS file: /cvs/src/usr.bin/mg/yank.c,v retrieving revision 1.14 diff -u -p -u -p -r1.14 yank.c --- yank.c 11 Dec 2015 20:21:23 -0000 1.14 +++ yank.c 30 May 2018 13:34:01 -0000 @@ -105,7 +105,7 @@ kremove(int n) { if (n < 0 || n + kstart >= kused) return (-1); - return (CHARMASK(kbufp[n + kstart])); + return ((unsigned char)(kbufp[n + kstart])); } /* @@ -206,7 +206,7 @@ killline(int f, int n) */ done: if (chunk) - return (ldelete(chunk, KFORW)); + return (ldeletebyte(chunk, KFORW)); return (TRUE); }