I'm updating a diff, but it is just a fix for cursor movement,
it is still experimental and obvious problems are not fixed yet,
this is just a diff taken from latest commit at github:
Index: basic.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/basic.c,v
retrieving revision 1.47
diff -u -p -u -p -r1.47 basic.c
--- basic.c     10 Oct 2015 09:13:14 -0000      1.47
+++ basic.c     30 May 2018 13:34:00 -0000
@@ -18,6 +18,7 @@
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 
@@ -64,6 +65,33 @@ backchar(int f, int n)
                        curwp->w_dotline--;
                } else
                        curwp->w_doto--;
+
+               /*
+                * If we're on a multibyte character, and not in
+                * raw mode, we should move all the way through it.
+                */
+               if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t offset = 0;
+                       size_t consumed = 0;
+                       struct line *clp = curwp->w_dotp;
+                       while (offset <= curwp->w_doto) {
+                               size_t s = curwp->w_doto - offset;
+                               size_t len = llength(clp) - s;
+                               consumed = mbrtowc(&wc, &clp->l_text[s],
+                                                  len, &mbs);
+                               if (consumed < (size_t) -2) {
+                                       curwp->w_doto -= offset;
+                                       break;
+                               }
+                               mbs = (mbstate_t) { 0 };
+                               offset++;
+                               if (offset > MB_LEN_MAX) {
+                                       break;
+                               }
+                       }
+               }
        }
        return (TRUE);
 }
@@ -108,8 +136,27 @@ forwchar(int f, int n)
                        curwp->w_doto = 0;
                        curwp->w_dotline++;
                        curwp->w_rflag |= WFMOVE;
-               } else
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       /*
+                        * If we're on a multibyte character, and
+                        * not in raw mode, we should move all the
+                        * way through it.
+                        */
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t s = curwp->w_doto;
+                       struct line *clp = curwp->w_dotp;
+                       size_t len = llength(clp) - s;
+                       size_t consumed = mbrtowc(&wc, &clp->l_text[s],
+                                                 len, &mbs);
+                       if (consumed && consumed < (size_t) -2) {
+                               curwp->w_doto += consumed;
+                       } else {
+                               curwp->w_doto++;
+                       }
+               } else {
                        curwp->w_doto++;
+               }
        }
        return (TRUE);
 }
@@ -269,12 +316,25 @@ setgoal(void)
 int
 getgoal(struct line *dlp)
 {
-       int c, i, col = 0;
-       char tmp[5];
+       return getbyteofcol(dlp, 0, curgoal);
+}
 
+/*
+ * Return the byte offset within lp that is targetcol columns beyond
+ * startbyte
+ */
+size_t
+getbyteofcol(const struct line *lp, const size_t startbyte,
+             const size_t targetcol)
+{
+       int c;
+       size_t i, col = 0;
+       char tmp[5];
+       size_t advance_by = 1;
 
-       for (i = 0; i < llength(dlp); i++) {
-               c = lgetc(dlp, i);
+       for (i = startbyte; i < llength(lp); i += advance_by) {
+               advance_by = 1;
+               c = lgetc(lp, i);
                if (c == '\t'
 #ifdef NOTAB
                    && !(curbp->b_flag & BFNOTAB)
@@ -282,18 +342,86 @@ getgoal(struct line *dlp)
                        ) {
                        col |= 0x07;
                        col++;
-               } else if (ISCTRL(c) != FALSE) {
+               } else if (iscntrl(c)) {
                        col += 2;
-               } else if (isprint(c))
+               } else if (isprint(c)) {
                        col++;
-               else {
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               advance_by = consumed;
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
+               } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }
-               if (col > curgoal)
+               if (col > targetcol)
                        break;
        }
        return (i);
 }
+
+/*
+ * Return the column at which specified offset byte would appear, if
+ * this were part of a longer string printed by vtputs, starting at
+ * intial_col
+ */
+size_t
+getcolofbyte(const struct line *lp, const size_t startbyte,
+             const size_t initial_col, const size_t targetoffset)
+{
+       int c;
+       size_t i, col = initial_col;
+       char tmp[5];
+       size_t advance_by = 1;
+
+       for (i = startbyte; i < llength(lp); i += advance_by) {
+               if (i >= targetoffset)
+                       break;
+               advance_by = 1;
+               c = lgetc(lp, i);
+               if (c == '\t'
+#ifdef NOTAB
+                   && !(curbp->b_flag & BFNOTAB)
+#endif
+                       ) {
+                       col |= 0x07;
+                       col++;
+               } else if (iscntrl(c)) {
+                       col += 2;
+               } else if (isprint(c)) {
+                       col++;
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               advance_by = consumed;
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
+               } else {
+                       col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+               }
+       }
+       return (col);
+}
+
 
 /*
  * Scroll forward by a specified number
Index: chrdef.h
===================================================================
RCS file: chrdef.h
diff -N chrdef.h
--- chrdef.h    25 Mar 2015 12:29:03 -0000      1.10
+++ /dev/null   1 Jan 1970 00:00:00 -0000
@@ -1,74 +0,0 @@
-/*     $OpenBSD: chrdef.h,v 1.10 2015/03/25 12:29:03 bcallah Exp $     */
-
-/* This file is in the public domain. */
-
-/*
- * sys/default/chardef.h: character set specific #defines for Mg 2a
- * Warning: System specific ones exist
- */
-
-/*
- * Casting should be at least as efficient as anding with 0xff,
- * and won't have the size problems.
- */
-#define        CHARMASK(c)     ((unsigned char) (c))
-
-/*
- * These flags, and the macros below them,
- * make up a do-it-yourself set of "ctype" macros that
- * understand the DEC multinational set, and let me ask
- * a slightly different set of questions.
- */
-#define _MG_W  0x01            /* Word.                         */
-#define _MG_U  0x02            /* Upper case letter.            */
-#define _MG_L  0x04            /* Lower case letter.            */
-#define _MG_C  0x08            /* Control.                      */
-#define _MG_P  0x10            /* end of sentence punctuation   */
-#define        _MG_D   0x20            /* is decimal digit              */
-
-#define ISWORD(c)      ((cinfo[CHARMASK(c)]&_MG_W)!=0)
-#define ISCTRL(c)      ((cinfo[CHARMASK(c)]&_MG_C)!=0)
-#define ISUPPER(c)     ((cinfo[CHARMASK(c)]&_MG_U)!=0)
-#define ISLOWER(c)     ((cinfo[CHARMASK(c)]&_MG_L)!=0)
-#define ISEOSP(c)      ((cinfo[CHARMASK(c)]&_MG_P)!=0)
-#define        ISDIGIT(c)      ((cinfo[CHARMASK(c)]&_MG_D)!=0)
-#define TOUPPER(c)     ((c)-0x20)
-#define TOLOWER(c)     ((c)+0x20)
-
-/*
- * Generally useful thing for chars
- */
-#define CCHR(x)                ((x) ^ 0x40)    /* CCHR('?') == DEL */
-
-#define        K00             256
-#define        K01             257
-#define        K02             258
-#define        K03             259
-#define        K04             260
-#define        K05             261
-#define        K06             262
-#define        K07             263
-#define        K08             264
-#define        K09             265
-#define        K0A             266
-#define        K0B             267
-#define        K0C             268
-#define        K0D             269
-#define        K0E             270
-#define        K0F             271
-#define        K10             272
-#define        K11             273
-#define        K12             274
-#define        K13             275
-#define        K14             276
-#define        K15             277
-#define        K16             278
-#define        K17             279
-#define        K18             280
-#define        K19             281
-#define        K1A             282
-#define        K1B             283
-#define        K1C             284
-#define        K1D             285
-#define        K1E             286
-#define        K1F             287
Index: cinfo.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/cinfo.c,v
retrieving revision 1.18
diff -u -p -u -p -r1.18 cinfo.c
--- cinfo.c     19 Mar 2015 21:22:15 -0000      1.18
+++ cinfo.c     30 May 2018 13:34:00 -0000
@@ -10,92 +10,25 @@
  * ctype.h) don't let you ask.
  */
 
+#include <ctype.h>
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdio.h>
 #include <string.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "def.h"
 
-/*
- * This table, indexed by a character drawn
- * from the 256 member character set, is used by my
- * own character type macros to answer questions about the
- * type of a character. It handles the full multinational
- * character set, and lets me ask some questions that the
- * standard "ctype" macros cannot ask.
- */
-/*
- * Due to incompatible behaviour between "standard" emacs and
- * ctags word traversing, '_' character's value is changed on 
- * the fly in ctags mode, hence non-const.
+/* Flag for treating '_' as word-like byte
  */
-char cinfo[256] = {
-       _MG_C, _MG_C, _MG_C, _MG_C,                                   /* 0x0X */
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       _MG_C, _MG_C, _MG_C, _MG_C,                                   /* 0x1X */
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       _MG_C, _MG_C, _MG_C, _MG_C,
-       0, _MG_P, 0, 0,                                               /* 0x2X */
-       _MG_W, _MG_W, 0, _MG_W,
-       0, 0, 0, 0,
-       0, 0, _MG_P, 0,
-       _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W,   /* 0x3X */
-       _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W, _MG_D | _MG_W,
-       _MG_D | _MG_W, _MG_D | _MG_W, 0, 0,
-       0, 0, 0, _MG_P,
-       0, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,               /* 0x4X */
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,   /* 0x5X */
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, 0,
-       0, 0, 0, 0,
-       0, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,               /* 0x6X */
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,   /* 0x7X */
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, 0,
-       0, 0, 0, _MG_C,
-       0, 0, 0, 0,                                                   /* 0x8X */
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,                                                   /* 0x9X */
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,                                                   /* 0xAX */
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,                                                   /* 0xBX */
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       0, 0, 0, 0,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,   /* 0xCX */
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       0, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,               /* 0xDX */
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W, _MG_U | _MG_W,
-       _MG_U | _MG_W, _MG_U | _MG_W, 0, _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,   /* 0xEX */
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       0, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,               /* 0xFX */
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W, _MG_L | _MG_W,
-       _MG_L | _MG_W, _MG_L | _MG_W, 0, 0
-};
+static int underscoreisword;
+
+void
+treatunderscoreasword(int i)
+{
+       underscoreisword = i;
+}
 
 /*
  * Find the name of a keystroke.  Needs to be changed to handle 8-bit printing
@@ -109,7 +42,7 @@ getkeyname(char *cp, size_t len, int k)
        size_t           copied;
 
        if (k < 0)
-               k = CHARMASK(k);        /* sign extended char */
+               k = (unsigned char)(k); /* sign extended char */
        switch (k) {
        case CCHR('@'):
                np = "C-SPC";
@@ -130,10 +63,10 @@ getkeyname(char *cp, size_t len, int k)
                np = "DEL";
                break;
        default:
-               if (k >= KFIRST && k <= KLAST &&
-                   (np = keystrings[k - KFIRST]) != NULL)
+               if (k == 256) {
+                       np = NULL;
                        break;
-               if (k > CCHR('?')) {
+               } else if (k > CCHR('?')) {
                        *cp++ = '0';
                        *cp++ = ((k >> 6) & 7) + '0';
                        *cp++ = ((k >> 3) & 7) + '0';
@@ -144,8 +77,8 @@ getkeyname(char *cp, size_t len, int k)
                        *cp++ = 'C';
                        *cp++ = '-';
                        k = CCHR(k);
-                       if (ISUPPER(k))
-                               k = TOLOWER(k);
+                       if (isupper(k))
+                               k = tolower(k);
                }
                *cp++ = k;
                *cp = '\0';
@@ -155,4 +88,32 @@ getkeyname(char *cp, size_t len, int k)
        if (copied >= len)
                copied = len - 1;
        return (cp + copied);
+}
+
+/*
+   Returns non-zero iff s[k] is a character (or the start of a
+   multibyte character, we won't examine more than "len" bytes)
+   which we consider to be "inside a word".  In practice, this means
+   alphanumerics, "$", "%", "'". We'd also like to sometimes include
+   "_", but that's rare enough that we'll handle it at the relevant
+   call site.
+ */
+int
+byteinword(const char *s, size_t k, size_t len) {
+       if (s[k] == '$' || s[k] == '%' || s[k] == '\'') {
+               return 1;
+       } else if (s[k] == '_') {
+               return underscoreisword;
+       }
+
+       mbstate_t mbs = { 0 };
+       wchar_t wc = 0;
+       size_t consumed = mbrtowc(&wc, &s[k], len, &mbs);
+
+       if (consumed < (size_t) -2) {
+               return iswalnum(wc);
+       }
+
+       /* If we're in byte-garbage: sure, whatever */
+       return 1;
 }
Index: cmode.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/cmode.c,v
retrieving revision 1.16
diff -u -p -u -p -r1.16 cmode.c
--- cmode.c     26 Sep 2015 21:51:58 -0000      1.16
+++ cmode.c     30 May 2018 13:34:00 -0000
@@ -14,6 +14,7 @@
 #include <ctype.h>
 #include <signal.h>
 #include <stdio.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "funmap.h"
@@ -419,10 +420,25 @@ findcolpos(const struct buffer *bp, cons
                        ) {
                        col |= 0x07;
                        col++;
-               } else if (ISCTRL(c) != FALSE)
+               } else if (iscntrl(c) != FALSE) {
                        col += 2;
-               else if (isprint(c)) {
+               } else if (isprint(c)) {
                        col++;
+               } else if (!(bp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               i += (consumed - 1);
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
                } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }
Index: def.h
===================================================================
RCS file: /cvs/src/usr.bin/mg/def.h,v
retrieving revision 1.155
diff -u -p -u -p -r1.155 def.h
--- def.h       14 Apr 2016 17:05:32 -0000      1.155
+++ def.h       30 May 2018 13:34:01 -0000
@@ -10,7 +10,10 @@
  * per-terminal definitions are in special header files.
  */
 
-#include       "chrdef.h"
+/*
+ * Generally useful thing for chars
+ */
+#define CCHR(x)                ((x) ^ 0x40)    /* CCHR('?') == DEL */
 
 typedef int    (*PF)(int, int);        /* generally useful type */
 
@@ -148,7 +151,7 @@ struct line {
  */
 #define lforw(lp)      ((lp)->l_fp)
 #define lback(lp)      ((lp)->l_bp)
-#define lgetc(lp, n)   (CHARMASK((lp)->l_text[(n)]))
+#define lgetc(lp, n)   ((unsigned char)((lp)->l_text[(n)]))
 #define lputc(lp, n, c) ((lp)->l_text[(n)]=(c))
 #define llength(lp)    ((lp)->l_used)
 #define ltext(lp)      ((lp)->l_text)
@@ -263,7 +266,7 @@ struct buffer {
        int              b_marko;       /* ditto for the "mark"          */
        short            b_nmodes;      /* number of non-fundamental modes */
        char             b_nwnd;        /* Count of windows on buffer    */
-       char             b_flag;        /* Flags                         */
+       short            b_flag;        /* Flags                         */
        char             b_fname[NFILEN]; /* File name                   */
        char             b_cwd[NFILEN]; /* working directory             */
        struct fileinfo  b_fi;          /* File attributes               */
@@ -290,6 +293,7 @@ struct buffer {
 #define BFDIRTY     0x20               /* Buffer was modified elsewhere */
 #define BFIGNDIRTY  0x40               /* Ignore modifications          */
 #define BFDIREDDEL  0x80               /* Dired has a deleted 'D' file  */
+#define BFSHOWRAW   0x100              /* Show unprintable as octal     */
 /*
  * This structure holds information about recent actions for the Undo command.
  */
@@ -314,9 +318,6 @@ struct undo_rec {
 
 #define putpad(str, num)       tputs(str, num, ttputc)
 
-#define KFIRST K00
-#define KLAST  K00
-
 /*
  * Prototypes.
  */
@@ -387,7 +388,8 @@ void                 lchange(int);
 int             linsert(int, int);
 int             lnewline_at(struct line *, int);
 int             lnewline(void);
-int             ldelete(RSIZE, int);
+int             ldeletebyte(RSIZE, int);
+int             ldeletechar(RSIZE, int);
 int             ldelnewline(void);
 int             lreplace(RSIZE, char *);
 char *          linetostr(const struct line *);
@@ -490,6 +492,7 @@ int          digit_argument(int, int);
 int             negative_argument(int, int);
 int             selfinsert(int, int);
 int             quote(int, int);
+int             insert_char(int, int);
 
 /* main.c */
 int             ctrlg(int, int);
@@ -499,7 +502,9 @@ int          quit(int, int);
 void            panic(char *);
 
 /* cinfo.c */
+void            treatunderscoreasword(int);
 char           *getkeyname(char  *, size_t, int);
+int             byteinword(const char *, size_t, size_t);
 
 /* basic.c */
 int             gotobol(int, int);
@@ -512,6 +517,8 @@ int          forwline(int, int);
 int             backline(int, int);
 void            setgoal(void);
 int             getgoal(struct line *);
+size_t          getbyteofcol(const struct line *, size_t, size_t);
+size_t          getcolofbyte(const struct line *, size_t, size_t, size_t);
 int             forwpage(int, int);
 int             backpage(int, int);
 int             forw1page(int, int);
@@ -658,6 +665,7 @@ int          notabmode(int, int);
 #endif /* NOTAB */
 int             overwrite_mode(int, int);
 int             set_default_mode(int,int);
+int             show_raw_mode(int, int);
 
 #ifdef REGEX
 /* re_search.c X */
@@ -737,7 +745,6 @@ extern int           doaudiblebell;
 extern int              dovisiblebell;
 extern int              dblspace;
 extern char             cinfo[];
-extern char            *keystrings[];
 extern char             pat[NPAT];
 extern char             prompt[];
 
Index: display.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/display.c,v
retrieving revision 1.48
diff -u -p -u -p -r1.48 display.c
--- display.c   6 Jul 2017 19:27:37 -0000       1.48
+++ display.c   30 May 2018 13:34:01 -0000
@@ -7,17 +7,19 @@
  * redisplay system knows almost nothing about the editing
  * process; the editing functions do, however, set some
  * hints to eliminate a lot of the grinding. There is more
- * that can be done; the "vtputc" interface is a real
+ * that can be done; the "vtputs" interface is a real
  * pig.
  */
 
 #include <sys/queue.h>
 #include <ctype.h>
+#include <limits.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <term.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "kbd.h"
@@ -52,11 +54,10 @@ struct score {
 };
 
 void   vtmove(int, int);
-void   vtputc(int);
 void   vtpute(int);
-int    vtputs(const char *);
+int    vtputs(const char *, size_t, size_t);
 void   vteeol(void);
-void   updext(int, int);
+int    updext(int, int);
 void   modeline(struct mgwin *, int);
 void   setscores(int, int);
 void   traceback(int, int, int, int);
@@ -216,8 +217,8 @@ vtresize(int force, int newrow, int newc
        }
        if (rowchanged || colchanged || first_run) {
                for (i = 0; i < 2 * (newrow - 1); i++)
-                       TRYREALLOC(video[i].v_text, newcol);
-               TRYREALLOC(blanks.v_text, newcol);
+                       TRYREALLOC(video[i].v_text, newcol * MB_LEN_MAX);
+               TRYREALLOC(blanks.v_text, newcol * MB_LEN_MAX);
        }
 
        nrow = newrow;
@@ -260,7 +261,7 @@ vtinit(void)
         */
 
        blanks.v_color = CTEXT;
-       for (i = 0; i < ncol; ++i)
+       for (i = 0; i < ncol * MB_LEN_MAX; ++i)
                blanks.v_text[i] = ' ';
 }
 
@@ -287,7 +288,7 @@ vttidy(void)
  * Move the virtual cursor to an origin
  * 0 spot on the virtual display screen. I could
  * store the column as a character pointer to the spot
- * on the line, which would make "vtputc" a little bit
+ * on the line, which would make "vtputs" a little bit
  * more efficient. No checking for errors.
  */
 void
@@ -298,88 +299,6 @@ vtmove(int row, int col)
 }
 
 /*
- * Write a character to the virtual display,
- * dealing with long lines and the display of unprintable
- * things like control characters. Also expand tabs every 8
- * columns. This code only puts printing characters into
- * the virtual display image. Special care must be taken when
- * expanding tabs. On a screen whose width is not a multiple
- * of 8, it is possible for the virtual cursor to hit the
- * right margin before the next tab stop is reached. This
- * makes the tab code loop if you are not careful.
- * Three guesses how we found this.
- */
-void
-vtputc(int c)
-{
-       struct video    *vp;
-
-       c &= 0xff;
-
-       vp = vscreen[vtrow];
-       if (vtcol >= ncol)
-               vp->v_text[ncol - 1] = '$';
-       else if (c == '\t'
-#ifdef NOTAB
-           && !(curbp->b_flag & BFNOTAB)
-#endif
-           ) {
-               do {
-                       vtputc(' ');
-               } while (vtcol < ncol && (vtcol & 0x07) != 0);
-       } else if (ISCTRL(c)) {
-               vtputc('^');
-               vtputc(CCHR(c));
-       } else if (isprint(c))
-               vp->v_text[vtcol++] = c;
-       else {
-               char bf[5];
-
-               snprintf(bf, sizeof(bf), "\\%o", c);
-               vtputs(bf);
-       }
-}
-
-/*
- * Put a character to the virtual screen in an extended line.  If we are not
- * yet on left edge, don't print it yet.  Check for overflow on the right
- * margin.
- */
-void
-vtpute(int c)
-{
-       struct video *vp;
-
-       c &= 0xff;
-
-       vp = vscreen[vtrow];
-       if (vtcol >= ncol)
-               vp->v_text[ncol - 1] = '$';
-       else if (c == '\t'
-#ifdef NOTAB
-           && !(curbp->b_flag & BFNOTAB)
-#endif
-           ) {
-               do {
-                       vtpute(' ');
-               } while (((vtcol + lbound) & 0x07) != 0 && vtcol < ncol);
-       } else if (ISCTRL(c) != FALSE) {
-               vtpute('^');
-               vtpute(CCHR(c));
-       } else if (isprint(c)) {
-               if (vtcol >= 0)
-                       vp->v_text[vtcol] = c;
-               ++vtcol;
-       } else {
-               char bf[5], *cp;
-
-               snprintf(bf, sizeof(bf), "\\%o", c);
-               for (cp = bf; *cp != '\0'; cp++)
-                       vtpute(*cp);
-       }
-}
-
-/*
  * Erase from the end of the software cursor to the end of the line on which
  * the software cursor is located. The display routines will decide if a
  * hardware erase to end of line command should be used to display this.
@@ -390,7 +309,7 @@ vteeol(void)
        struct video *vp;
 
        vp = vscreen[vtrow];
-       while (vtcol < ncol)
+       while (vtcol < ncol * MB_LEN_MAX)
                vp->v_text[vtcol++] = ' ';
 }
 
@@ -410,7 +329,7 @@ update(int modelinecolor)
        struct mgwin    *wp;
        struct video    *vp1;
        struct video    *vp2;
-       int      c, i, j;
+       int      c, i;
        int      hflag;
        int      currow, curcol;
        int      offs, size;
@@ -485,8 +404,9 @@ update(int modelinecolor)
                        vscreen[i]->v_color = CTEXT;
                        vscreen[i]->v_flag |= (VFCHG | VFHBAD);
                        vtmove(i, 0);
-                       for (j = 0; j < llength(lp); ++j)
-                               vtputc(lgetc(lp, j));
+                       if (llength(lp)) {
+                               vtputs(lp->l_text, llength(lp), 0);
+                       }
                        vteeol();
                } else if ((wp->w_rflag & (WFEDIT | WFFULL)) != 0) {
                        hflag = TRUE;
@@ -495,8 +415,10 @@ update(int modelinecolor)
                                vscreen[i]->v_flag |= (VFCHG | VFHBAD);
                                vtmove(i, 0);
                                if (lp != wp->w_bufp->b_headp) {
-                                       for (j = 0; j < llength(lp); ++j)
-                                               vtputc(lgetc(lp, j));
+                                       if (llength(lp)) {
+                                               vtputs(lp->l_text, llength(lp),
+                                                   0);
+                                       }
                                        lp = lforw(lp);
                                }
                                vteeol();
@@ -514,32 +436,53 @@ update(int modelinecolor)
                ++currow;
                lp = lforw(lp);
        }
+
        curcol = 0;
        i = 0;
        while (i < curwp->w_doto) {
-               c = lgetc(lp, i++);
+               char tmp[5];
+               c = lgetc(lp, i);
                if (c == '\t'
 #ifdef NOTAB
                    && !(curbp->b_flag & BFNOTAB)
 #endif
                        ) {
-                       curcol |= 0x07;
                        curcol++;
-               } else if (ISCTRL(c) != FALSE)
+                       while ((curcol - lbound) & 0x07) {
+                               curcol++;
+                       }
+               } else if (iscntrl(c) != FALSE)
                        curcol += 2;
-               else if (isprint(c))
+               else if (isprint(c)) {
                        curcol++;
-               else {
-                       char bf[5];
-
-                       snprintf(bf, sizeof(bf), "\\%o", c);
-                       curcol += strlen(bf);
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &lp->l_text[i],
+                                                 llength(lp) - i, &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       } else {
+                               memset(&mbs, 0, sizeof mbs);
+                       }
+                       if (width >= 0) {
+                               curcol += width;
+                               i += (consumed - 1);
+                       } else {
+                               snprintf(tmp, sizeof(tmp), "\\%o", c);
+                               curcol += strlen(tmp);
+                       }
+               } else {
+                       snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       curcol += strlen(tmp);
                }
+               i++;
        }
        if (curcol >= ncol - 1) {       /* extended line. */
                /* flag we are extended and changed */
                vscreen[currow]->v_flag |= VFEXT | VFCHG;
-               updext(currow, curcol); /* and output extended line */
+               curcol = updext(currow, curcol);
        } else
                lbound = 0;     /* not extended line */
 
@@ -558,8 +501,10 @@ update(int modelinecolor)
                                if ((wp != curwp) || (lp != wp->w_dotp) ||
                                    (curcol < ncol - 1)) {
                                        vtmove(i, 0);
-                                       for (j = 0; j < llength(lp); ++j)
-                                               vtputc(lgetc(lp, j));
+                                       if (llength(lp)) {
+                                               vtputs(lp->l_text, llength(lp),
+                                                   0);
+                                       }
                                        vteeol();
                                        /* this line no longer is extended */
                                        vscreen[i]->v_flag &= ~VFEXT;
@@ -661,39 +606,44 @@ ucopy(struct video *vvp, struct video *p
        pvp->v_hash = vvp->v_hash;
        pvp->v_cost = vvp->v_cost;
        pvp->v_color = vvp->v_color;
-       bcopy(vvp->v_text, pvp->v_text, ncol);
+       bcopy(vvp->v_text, pvp->v_text, ncol * MB_LEN_MAX);
 }
 
 /*
- * updext: update the extended line which the cursor is currently on at a
- * column greater than the terminal width. The line will be scrolled right or
- * left to let the user see where the cursor is.
+ * updext: update the extended line which the cursor is currently on
+ * at a column greater than the terminal width. The line will be
+ * scrolled right or left to let the user see where the cursor
+ * is. curcol may need to be adjusted, depending on how wide
+ * characters and lbound interact, that adjusted position is returned.
  */
-void
+int
 updext(int currow, int curcol)
 {
-       struct line     *lp;                    /* pointer to current line */
-       int      j;                     /* index into line */
+       struct line *lp = curwp->w_dotp;        /* pointer to current line */
+       size_t startbyte;
+       int bettercol = curcol;
+       size_t fullextent;
 
        if (ncol < 2)
-               return;
+               return curcol;
 
        /*
-        * calculate what column the left bound should be
-        * (force cursor into middle half of screen)
+        * calculate what column the left bound should be (force
+        * cursor into middle half of screen). Ensuring that it is at
+        * a tabstop allows update() to calculate curcol without
+        * wondering how tabstops are calculated before the first '$'.
         */
        lbound = curcol - (curcol % (ncol >> 1)) - (ncol >> 2);
+       lbound = (lbound | 0x07) + 1;
+       vscreen[currow]->v_text[0] = '$';
+       vtmove(currow, 1);
+       startbyte = getbyteofcol(lp, 0, lbound + 1);
+       fullextent = getbyteofcol(lp, startbyte, ncol + 1);
+       vtputs(lp->l_text + startbyte, fullextent - startbyte, 1);
+       vteeol();
 
-       /*
-        * scan through the line outputing characters to the virtual screen
-        * once we reach the left edge
-        */
-       vtmove(currow, -lbound);                /* start scanning offscreen */
-       lp = curwp->w_dotp;                     /* line to output */
-       for (j = 0; j < llength(lp); ++j)       /* until the end-of-line */
-               vtpute(lgetc(lp, j));
-       vteeol();                               /* truncate the virtual line */
-       vscreen[currow]->v_text[0] = '$';       /* and put a '$' in column 1 */
+       bettercol = lbound + getcolofbyte(lp, startbyte, 1, curwp->w_doto);
+       return (bettercol);
 }
 
 /*
@@ -708,12 +658,35 @@ updext(int currow, int curcol)
 void
 uline(int row, struct video *vvp, struct video *pvp)
 {
-       char  *cp1;
-       char  *cp2;
-       char  *cp3;
-       char  *cp4;
-       char  *cp5;
+       char  *cp1; /* Pointer to the start of dirty region */
+       char  *cp2; /* pvp's counterpart for cp1 */
+       char  *cp3; /* Pointer to end of dirty region */
+       char  *cp4; /* pvp's counterpart for cp3 */
+       char  *cp5; /* After this, within dirty region, all is ' ' */
+       char  *mbcounter;
        int    nbflag;
+       int    startcol; /* onscreen column matching cp1 */
+       char  *lastbyte; /* byte which handles last onscreen column  */
+       int    seencols = 0;
+       mbstate_t mbs = { 0 };
+       wchar_t wc = 0;
+
+       lastbyte = vvp->v_text;
+       while (seencols < ncol && *lastbyte) {
+               size_t consumed = mbrtowc(&wc, lastbyte,
+                   (vvp->v_text + ncol * MB_LEN_MAX - lastbyte), &mbs);
+               if (consumed < (size_t) -2) {
+                       lastbyte += consumed;
+                       seencols += wcwidth(wc);
+               } else {
+                       lastbyte++;
+                       seencols++;
+                       memset(&mbs, 0, sizeof mbs);
+               }
+       }
+       if (lastbyte - vvp->v_text < ncol) {
+               lastbyte = &vvp->v_text[ncol];
+       }
 
        if (vvp->v_color != pvp->v_color) {     /* Wrong color, do a     */
                ttmove(row, 0);                 /* full redraw.          */
@@ -729,11 +702,12 @@ uline(int row, struct video *vvp, struct
                 * putting the invisible glitch character on the next line.
                 * (Hazeltine executive 80 model 30)
                 */
-               cp2 = &vvp->v_text[ncol - (magic_cookie_glitch >= 0 ?
-                   (magic_cookie_glitch != 0 ? magic_cookie_glitch : 1) : 0)];
+               cp2 = lastbyte -
+                   (magic_cookie_glitch >= 0 ? (magic_cookie_glitch != 0 ?
+                   magic_cookie_glitch : 1) : 0);
 #else
                cp1 = &vvp->v_text[0];
-               cp2 = &vvp->v_text[ncol];
+               cp2 = lastbyte;
 #endif
                while (cp1 != cp2) {
                        ttputc(*cp1++);
@@ -744,21 +718,31 @@ uline(int row, struct video *vvp, struct
        }
        cp1 = &vvp->v_text[0];          /* Compute left match.   */
        cp2 = &pvp->v_text[0];
-       while (cp1 != &vvp->v_text[ncol] && cp1[0] == cp2[0]) {
+       while (cp1 != lastbyte && cp1[0] == cp2[0]) {
                ++cp1;
                ++cp2;
        }
-       if (cp1 == &vvp->v_text[ncol])  /* All equal.            */
+       if (cp1 == lastbyte)    /* All equal.            */
                return;
+       while (cp1 != vvp->v_text && !isprint(*cp1) &&
+              mbrtowc(&wc, cp1, (lastbyte - cp1), &mbs) >= (size_t) -2) {
+               --cp1;
+               --cp2;
+       }
        nbflag = FALSE;
-       cp3 = &vvp->v_text[ncol];       /* Compute right match.  */
-       cp4 = &pvp->v_text[ncol];
+       cp3 = lastbyte; /* Compute right match.  */
+       cp4 = &pvp->v_text[lastbyte - vvp->v_text];
        while (cp3[-1] == cp4[-1]) {
                --cp3;
                --cp4;
                if (cp3[0] != ' ')      /* Note non-blanks in    */
                        nbflag = TRUE;  /* the right match.      */
        }
+       while (cp3 != lastbyte && !isprint(*cp3) &&
+               mbrtowc(&wc, cp3, (lastbyte - cp3), &mbs) >= (size_t) -2) {
+               ++cp3;
+               ++cp4;
+       }
        cp5 = cp3;                      /* Is erase good?        */
        if (nbflag == FALSE && vvp->v_color == CTEXT) {
                while (cp5 != cp1 && cp5[-1] == ' ')
@@ -768,13 +752,27 @@ uline(int row, struct video *vvp, struct
                        cp5 = cp3;
        }
        /* Alcyon hack */
-       ttmove(row, (int) (cp1 - &vvp->v_text[0]));
+       startcol = 0;
+       mbcounter = vvp->v_text;
+       while ((cp1 - mbcounter) > 0) {
+               size_t consumed = mbrtowc(&wc, mbcounter, (cp1 - mbcounter),
+                                         &mbs);
+               if (consumed < (size_t) -2) {
+                       mbcounter += consumed;
+                       startcol += wcwidth(wc);
+               } else {
+                       mbcounter++;
+                       startcol++;
+                       memset(&mbs, 0, sizeof mbs);
+               }
+       }
+       ttmove(row, startcol);
 #ifdef STANDOUT_GLITCH
        if (vvp->v_color != CTEXT && magic_cookie_glitch > 0) {
                if (cp1 < &vvp->v_text[magic_cookie_glitch])
                        cp1 = &vvp->v_text[magic_cookie_glitch];
-               if (cp5 > &vvp->v_text[ncol - magic_cookie_glitch])
-                       cp5 = &vvp->v_text[ncol - magic_cookie_glitch];
+               if (cp5 > lastbyte - magic_cookie_glitch)
+                       cp5 = lastbyte - magic_cookie_glitch;
        } else if (magic_cookie_glitch < 0)
 #endif
                ttcolor(vvp->v_color);
@@ -807,46 +805,39 @@ modeline(struct mgwin *wp, int modelinec
        vscreen[n]->v_flag |= (VFCHG | VFHBAD); /* Recompute, display.   */
        vtmove(n, 0);                           /* Seek to right line.   */
        bp = wp->w_bufp;
-       vtputc('-');
-       vtputc('-');
+       n = vtputs("--", 0, 0);
        if ((bp->b_flag & BFREADONLY) != 0) {
-               vtputc('%');
+               n += vtputs("%", 0, n);
                if ((bp->b_flag & BFCHG) != 0)
-                       vtputc('*');
+                       n += vtputs("*", 0, n);
                else
-                       vtputc('%');
+                       n += vtputs("%", 0, n);
        } else if ((bp->b_flag & BFCHG) != 0) { /* "*" if changed.       */
-               vtputc('*');
-               vtputc('*');
+               n += vtputs("**", 0, n);
        } else {
-               vtputc('-');
-               vtputc('-');
+               n += vtputs("--", 0, n);
        }
-       vtputc('-');
+       n += vtputs("-", 0, n);
        n = 5;
-       n += vtputs("Mg: ");
+       n += vtputs("Mg: ", 0, n);
        if (bp->b_bname[0] != '\0')
-               n += vtputs(&(bp->b_bname[0]));
+               n += vtputs(&(bp->b_bname[0]), 0, n);
        while (n < 42) {                        /* Pad out with blanks.  */
-               vtputc(' ');
-               ++n;
+               n += vtputs(" ", 0, n);
        }
-       vtputc('(');
-       ++n;
+       n += vtputs("(", 0, n);
        for (md = 0; ; ) {
-               n += vtputs(bp->b_modes[md]->p_name);
+               n += vtputs(bp->b_modes[md]->p_name, 0, n);
                if (++md > bp->b_nmodes)
                        break;
-               vtputc('-');
-               ++n;
+               n += vtputs("-", 0, n);
        }
        /* XXX These should eventually move to a real mode */
        if (macrodef == TRUE)
-               n += vtputs("-def");
+               n += vtputs("-def", 0, n);
        if (globalwd == TRUE)
-               n += vtputs("-gwd");
-       vtputc(')');
-       ++n;
+               n += vtputs("-gwd", 0, n);
+       n += vtputs(")", 0, n);
 
        if (linenos && colnos)
                len = snprintf(sl, sizeof(sl), "--L%d--C%d", wp->w_dotline,
@@ -856,27 +847,132 @@ modeline(struct mgwin *wp, int modelinec
        else if (colnos)
                len = snprintf(sl, sizeof(sl), "--C%d", getcolpos(wp));
        if ((linenos || colnos) && len < sizeof(sl) && len != -1)
-               n += vtputs(sl);
+               n += vtputs(sl, 0, n);
 
        while (n < ncol) {                      /* Pad out.              */
-               vtputc('-');
-               ++n;
+               n += vtputs("-", 0, n);
        }
 }
 
 /*
- * Output a string to the mode line, report how long it was.
+ * Output a string to the mode line, report how long it was,
+ * dealing with long lines and the display of unprintable
+ * things like control characters. Also expand tabs every 8
+ * columns. This code only puts printing characters into
+ * the virtual display image. Special care must be taken when
+ * expanding tabs. On a screen whose width is not a multiple
+ * of 8, it is possible for the virtual cursor to hit the
+ * right margin before the next tab stop is reached. This
+ * makes the tab code loop if you are not careful.
+ * Three guesses how we found this.
  */
 int
-vtputs(const char *s)
+vtputs(const char *s, const size_t max_bytes, const size_t initial_col)
 {
-       int n = 0;
+       const unsigned char *us = (const unsigned char *) s;
+       struct video *vp = vscreen[vtrow];
+       size_t bytes_handled = 0;
+       size_t last_full_byte_start = vtcol;
+       size_t space_printed = 0;
+
+       if (!s) {
+               return (0);
+       }
+
+       while (*us && (!max_bytes || bytes_handled < max_bytes)) {
+               if (space_printed + initial_col >= ncol) {
+                       break;
+               } else if (*us == '\t'
+#ifdef NOTAB
+                          && !(curbp->b_flag & BFNOTAB)
+#endif
+                          ) {
+                       last_full_byte_start = vtcol;
+                       do {
+                               if (vtcol >= 0) {
+                                       last_full_byte_start = vtcol;
+                                       vp->v_text[vtcol] = ' ';
+                               }
+                               vtcol++;
+                               space_printed++;
+                       } while (space_printed + initial_col < ncol &&
+                                ((space_printed + initial_col) & 0x07));
+                       us++;
+                       bytes_handled++;
+               } else if (iscntrl(*us)) {
+                       last_full_byte_start = vtcol;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = '^';
+                       }
+                       vtcol++;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = CCHR(*us);
+                       }
+                       vtcol++;
+                       bytes_handled++;
+                       space_printed += 2;
+                       us++;
+               } else if (isprint(*us)) {
+                       last_full_byte_start = vtcol;
+                       if (vtcol >= 0) {
+                               vp->v_text[vtcol] = *us++;
+                       }
+                       vtcol++;
+                       bytes_handled++;
+                       space_printed++;
+               } else if (!(curbp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumable = max_bytes ?
+                               (max_bytes - bytes_handled) : -1;
+                       size_t consumed = mbrtowc(&wc, (const char *)us,
+                                                 consumable, &mbs);
+                       int width = -1;
+                       last_full_byte_start = vtcol;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               bytes_handled += consumed;
+                               space_printed += width;
+                               do {
+                                       if (vtcol >= 0) {
+                                               vp->v_text[vtcol] = *us++;
+                                       }
+                                       vtcol++;
+                               } while (--consumed);
+                       } else {
+                               char bf[5];
+                               snprintf(bf, sizeof(bf), "\\%o", *us);
+                               bytes_handled++;
+                               space_printed += vtputs(bf, 0,
+                                   space_printed + initial_col);
+                               us++;
+                       }
+               } else {
+                       char bf[5];
+                       last_full_byte_start = vtcol;
+                       snprintf(bf, sizeof(bf), "\\%o", *us);
+                       bytes_handled++;
+                       space_printed += vtputs(bf, 0,
+                           space_printed + initial_col);
+                       us++;
+               }
+       }
 
-       while (*s != '\0') {
-               vtputc(*s++);
-               ++n;
+       if ((space_printed + initial_col > ncol) ||
+           (space_printed + initial_col == ncol &&
+           (*us && (!max_bytes || bytes_handled < max_bytes)))) {
+               vp->v_text[last_full_byte_start] = '$';
+               while (++last_full_byte_start <= vtcol) {
+                       vp->v_text[last_full_byte_start] = ' ';
+               }
+               bytes_handled++;
+               space_printed++;
+               us++;
        }
-       return (n);
+
+       return (space_printed);
 }
 
 /*
@@ -894,11 +990,11 @@ hash(struct video *vp)
        char   *s;
 
        if ((vp->v_flag & VFHBAD) != 0) {       /* Hash bad.             */
-               s = &vp->v_text[ncol - 1];
-               for (i = ncol; i != 0; --i, --s)
+               s = &vp->v_text[ncol * MB_LEN_MAX - 1];
+               for (i = ncol * MB_LEN_MAX; i != 0; --i, --s)
                        if (*s != ' ')
                                break;
-               n = ncol - i;                   /* Erase cheaper?        */
+               n = ncol * MB_LEN_MAX - i;                      /* Erase 
cheaper?        */
                if (n > tceeol)
                        n = tceeol;
                vp->v_cost = i + n;             /* Bytes + blanks.       */
Index: echo.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/echo.c,v
retrieving revision 1.66
diff -u -p -u -p -r1.66 echo.c
--- echo.c      24 Oct 2016 17:18:42 -0000      1.66
+++ echo.c      30 May 2018 13:34:01 -0000
@@ -9,6 +9,7 @@
  * of the display screen. Used by the entire known universe.
  */
 
+#include <ctype.h>
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdarg.h>
@@ -187,6 +188,7 @@ static char *
 veread(const char *fp, char *buf, size_t nbuf, int flag, va_list ap)
 {
        int      dynbuf = (buf == NULL);
+       size_t   buf_len;
        int      cpos, epos;            /* cursor, end position in buf */
        int      c, i, y;
        int      cplflag;               /* display completion list */
@@ -215,6 +217,11 @@ veread(const char *fp, char *buf, size_t
        epos = cpos = 0;
        ml = mr = esc = 0;
        cplflag = FALSE;
+       if (buf == NULL) {
+               buf_len = 0;
+       } else {
+               buf_len = strlen(buf);
+       }
 
        if ((flag & EFNEW) != 0 || ttrow != nrow - 1) {
                ttcolor(CTEXT);
@@ -227,7 +234,7 @@ veread(const char *fp, char *buf, size_t
                if (buf == NULL)
                        return (NULL);
                eputs(buf);
-               epos = cpos += strlen(buf);
+               epos = cpos += buf_len;
        }
        tteeol();
        ttflush();
@@ -275,7 +282,7 @@ veread(const char *fp, char *buf, size_t
                switch (c) {
                case CCHR('A'): /* start of line */
                        while (cpos > 0) {
-                               if (ISCTRL(buf[--cpos]) != FALSE) {
+                               if (iscntrl(buf[--cpos]) != FALSE) {
                                        ttputc('\b');
                                        --ttcol;
                                }
@@ -306,7 +313,7 @@ veread(const char *fp, char *buf, size_t
                        break;
                case CCHR('B'): /* back */
                        if (cpos > 0) {
-                               if (ISCTRL(buf[--cpos]) != FALSE) {
+                               if (iscntrl(buf[--cpos]) != FALSE) {
                                        ttputc('\b');
                                        --ttcol;
                                }
@@ -411,7 +418,7 @@ veread(const char *fp, char *buf, size_t
                                epos--;
                                ttputc('\b');
                                ttcol--;
-                               if (ISCTRL(y) != FALSE) {
+                               if (iscntrl(y) != FALSE) {
                                        ttputc('\b');
                                        ttcol--;
                                }
@@ -422,7 +429,7 @@ veread(const char *fp, char *buf, size_t
                                        eputc(buf[i]);
                                }
                                ttputc(' ');
-                               if (ISCTRL(y) != FALSE) {
+                               if (iscntrl(y) != FALSE) {
                                        ttputc(' ');
                                        ttputc('\b');
                                }
@@ -438,7 +445,7 @@ veread(const char *fp, char *buf, size_t
                                ttputc(' ');
                                ttputc('\b');
                                --ttcol;
-                               if (ISCTRL(buf[--cpos]) != FALSE) {
+                               if (iscntrl(buf[--cpos]) != FALSE) {
                                        ttputc('\b');
                                        ttputc(' ');
                                        ttputc('\b');
@@ -449,12 +456,12 @@ veread(const char *fp, char *buf, size_t
                        ttflush();
                        break;
                case CCHR('W'):                 /* kill to beginning of word */
-                       while ((cpos > 0) && !ISWORD(buf[cpos - 1])) {
+                       while ((cpos > 0) && !byteinword(buf, cpos - 1, buf_len 
- cpos + 1)) {
                                ttputc('\b');
                                ttputc(' ');
                                ttputc('\b');
                                --ttcol;
-                               if (ISCTRL(buf[--cpos]) != FALSE) {
+                               if (iscntrl(buf[--cpos]) != FALSE) {
                                        ttputc('\b');
                                        ttputc(' ');
                                        ttputc('\b');
@@ -462,12 +469,12 @@ veread(const char *fp, char *buf, size_t
                                }
                                epos--;
                        }
-                       while ((cpos > 0) && ISWORD(buf[cpos - 1])) {
+                       while ((cpos > 0) && byteinword(buf, cpos - 1, buf_len 
- cpos + 1)) {
                                ttputc('\b');
                                ttputc(' ');
                                ttputc('\b');
                                --ttcol;
-                               if (ISCTRL(buf[--cpos]) != FALSE) {
+                               if (iscntrl(buf[--cpos]) != FALSE) {
                                        ttputc('\b');
                                        ttputc(' ');
                                        ttputc('\b');
@@ -797,6 +804,7 @@ int
 getxtra(struct list *lp1, struct list *lp2, int cpos, int wflag)
 {
        int     i;
+       size_t  name_len = strlen(lp1->l_name);
 
        i = cpos;
        for (;;) {
@@ -805,7 +813,7 @@ getxtra(struct list *lp1, struct list *l
                if (lp1->l_name[i] == '\0')
                        break;
                ++i;
-               if (wflag && !ISWORD(lp1->l_name[i - 1]))
+               if (wflag && !byteinword(lp1->l_name, i - 1, name_len - i + 1))
                        break;
        }
        return (i - cpos);
@@ -844,9 +852,11 @@ ewprintf(const char *fmt, ...)
  * %k prints the name of the current key (and takes no arguments).
  * %d prints a decimal integer
  * %o prints an octal integer
+ * %x prints a hexadecimal integer
  * %p prints a pointer
  * %s prints a string
  * %ld prints a long word
+ * %lx prints a hexadecimal long word
  * Anything else is echoed verbatim
  */
 static void
@@ -885,6 +895,10 @@ eformat(const char *fp, va_list ap)
                                eputi(va_arg(ap, int), 8);
                                break;
 
+                       case 'x':
+                               eputi(va_arg(ap, int), 16);
+                               break;
+
                        case 'p':
                                snprintf(tmp, sizeof(tmp), "%p",
                                    va_arg(ap, void *));
@@ -902,6 +916,9 @@ eformat(const char *fp, va_list ap)
                                case 'd':
                                        eputl(va_arg(ap, long), 10);
                                        break;
+                               case 'x':
+                                       eputl(va_arg(ap, long), 16);
+                                       break;
                                default:
                                        eputc(c);
                                        break;
@@ -939,6 +956,7 @@ static void
 eputl(long l, int r)
 {
        long     q;
+       int      c;
 
        if (l < 0) {
                eputc('-');
@@ -946,7 +964,10 @@ eputl(long l, int r)
        }
        if ((q = l / r) != 0)
                eputl(q, r);
-       eputc((int)(l % r) + '0');
+       c = (int)(l % r) + '0';
+       if (c > '9')
+               c += 'a' - '9' - 1;
+       eputc(c);
 }
 
 /*
@@ -969,7 +990,7 @@ static void
 eputc(char c)
 {
        if (ttcol + 2 < ncol) {
-               if (ISCTRL(c)) {
+               if (iscntrl(c)) {
                        eputc('^');
                        c = CCHR(c);
                }
Index: extend.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/extend.c,v
retrieving revision 1.64
diff -u -p -u -p -r1.64 extend.c
--- extend.c    1 Sep 2016 21:06:09 -0000       1.64
+++ extend.c    30 May 2018 13:34:01 -0000
@@ -15,7 +15,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "chrdef.h"
 #include "def.h"
 #include "funmap.h"
 #include "kbd.h"
@@ -349,7 +348,7 @@ dobind(KEYMAP *curmap, const char *p, in
        }
        if (inmacro) {
                for (s = 0; s < maclcur->l_used - 1; s++) {
-                       if (doscan(curmap, c = CHARMASK(maclcur->l_text[s]), 
&curmap)
+                       if (doscan(curmap, c = (unsigned 
char)(maclcur->l_text[s]), &curmap)
                            != NULL) {
                                if (remap(curmap, c, NULL, NULL)
                                    != TRUE)
@@ -726,7 +725,7 @@ excline(char *line)
        if (*line != '\0') {
                *line++ = '\0';
                line = skipwhite(line);
-               if (ISDIGIT(*line) || *line == '-') {
+               if (isdigit(*line) || *line == '-') {
                        argp = line;
                        line = parsetoken(line);
                }
@@ -815,9 +814,9 @@ excline(char *line)
                                                 * split into two statements
                                                 * due to bug in OSK cpp
                                                 */
-                                               c = CHARMASK(*++argp);
-                                               c = ISLOWER(c) ?
-                                                   CCHR(TOUPPER(c)) : CCHR(c);
+                                               c = (unsigned char)(*++argp);
+                                               c = islower(c) ?
+                                                   CCHR(toupper(c)) : CCHR(c);
                                                break;
                                        case '0':
                                        case '1':
@@ -843,14 +842,14 @@ excline(char *line)
                                        case 'f':
                                        case 'F':
                                                c = *++argp - '0';
-                                               if (ISDIGIT(argp[1])) {
+                                               if (isdigit(argp[1])) {
                                                        c *= 10;
                                                        c += *++argp - '0';
                                                }
-                                               c += KFIRST;
+                                               c += 256;
                                                break;
                                        default:
-                                               c = CHARMASK(*argp);
+                                               c = (unsigned char)(*argp);
                                                break;
                                        }
                                        argp++;
Index: funmap.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/funmap.c,v
retrieving revision 1.53
diff -u -p -u -p -r1.53 funmap.c
--- funmap.c    14 Apr 2016 17:05:32 -0000      1.53
+++ funmap.c    30 May 2018 13:34:01 -0000
@@ -114,6 +114,7 @@ static struct funmap functnames[] = {
        {bufferinsert, "insert-buffer",},
        {fileinsert, "insert-file",},
        {fillword, "insert-with-wrap",},
+       {insert_char, "insert-char",},
        {backisearch, "isearch-backward",},
        {forwisearch, "isearch-forward",},
        {joinline, "join-line",},
@@ -191,6 +192,7 @@ static struct funmap functnames[] = {
        {shellcommand, "shell-command",},
        {piperegion, "shell-command-on-region",},
        {shrinkwind, "shrink-window",},
+       {show_raw_mode, "show-raw-mode",},
 #ifdef NOTAB
        {space_to_tabstop, "space-to-tabstop",},
 #endif /* NOTAB */
Index: help.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/help.c,v
retrieving revision 1.35
diff -u -p -u -p -r1.35 help.c
--- help.c      19 Mar 2015 21:22:15 -0000      1.35
+++ help.c      30 May 2018 13:34:01 -0000
@@ -6,6 +6,7 @@
  * Help functions for Mg 2
  */
 
+#include <ctype.h>
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdio.h>
@@ -57,9 +58,9 @@ desckey(int f, int n)
                }
                if (funct != rescan)
                        break;
-               if (ISUPPER(key.k_chars[key.k_count - 1])) {
+               if (isupper(key.k_chars[key.k_count - 1])) {
                        funct = doscan(curmap,
-                           TOLOWER(key.k_chars[key.k_count - 1]), &curmap);
+                           tolower(key.k_chars[key.k_count - 1]), &curmap);
                        if (funct == NULL) {
                                *pep++ = '-';
                                *pep = '\0';
Index: kbd.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/kbd.c,v
retrieving revision 1.30
diff -u -p -u -p -r1.30 kbd.c
--- kbd.c       26 Sep 2015 21:51:58 -0000      1.30
+++ kbd.c       30 May 2018 13:34:01 -0000
@@ -6,9 +6,13 @@
  *     Terminal independent keyboard handling.
  */
 
+#include <ctype.h>
+#include <limits.h>
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 #include "kbd.h"
@@ -168,8 +172,8 @@ rescan(int f, int n)
        int      md = curbp->b_nmodes;
 
        for (;;) {
-               if (ISUPPER(key.k_chars[key.k_count - 1])) {
-                       c = TOLOWER(key.k_chars[key.k_count - 1]);
+               if (isupper(key.k_chars[key.k_count - 1])) {
+                       c = tolower(key.k_chars[key.k_count - 1]);
                        curmap = curbp->b_modes[md]->p_map;
                        for (i = 0; i < key.k_count - 1; i++) {
                                if ((fp = doscan(curmap, (key.k_chars[i]),
@@ -403,6 +407,43 @@ quote(int f, int n)
                        ungetkey(c);
        }
        return (selfinsert(f, n));
+}
+
+/*
+ * Prompt for a codepoint in whatever the native system's encoding is,
+ * insert it into the file
+ */
+int
+insert_char(int f, int n)
+{
+       char *bufp;
+       char inpbuf[32];
+       wchar_t wc;
+       char mb[MB_LEN_MAX + 1];
+       mbstate_t mbs = { 0 };
+       size_t mbslen;
+       size_t i;
+
+       if ((bufp = eread("Insert character (hex): ", inpbuf, sizeof inpbuf,
+            EFNEW)) == NULL) {
+               return (ABORT);
+       } else if (bufp[0] == '\0') {
+               return (FALSE);
+       }
+
+       wc = (wchar_t) strtoll(bufp, NULL, 16);
+       mbslen = wcrtomb(mb, wc, &mbs);
+       if (mbslen == (size_t) -1) {
+               return (FALSE);
+       }
+
+       for (i = 0; i < mbslen; ++i) {
+               if (linsert(1, mb[i]) == FALSE) {
+                       return (FALSE);
+               }
+       }
+
+       return (TRUE);
 }
 
 /*
Index: keymap.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/keymap.c,v
retrieving revision 1.58
diff -u -p -u -p -r1.58 keymap.c
--- keymap.c    29 Dec 2015 19:44:32 -0000      1.58
+++ keymap.c    30 May 2018 13:34:01 -0000
@@ -120,6 +120,21 @@ static struct KEYMAPE (2) cX4map = {
        }
 };
 
+static PF cX8J[] = {
+       insert_char             /* ^M */
+};
+
+static struct KEYMAPE (1) cX8map = {
+       1,
+       1,
+       rescan,
+       {
+               {
+                       CCHR('M'), CCHR('M'), cX8J, NULL
+               }
+       }
+};
+
 static PF cXcB[] = {
        listbuffers,            /* ^B */
        quit,                   /* ^C */
@@ -158,6 +173,10 @@ static PF cX0[] = {
        NULL                    /* 4 */
 };
 
+static PF cX8[] = {
+       NULL                    /* 4 */
+};
+
 static PF cXeq[] = {
        showcpos                /* = */
 };
@@ -189,9 +208,9 @@ static PF cXcar[] = {
        undo                    /* u */
 };
 
-struct KEYMAPE (6) cXmap = {
-       6,
-       6,
+struct KEYMAPE (7) cXmap = {
+       7,
+       7,
        rescan,
        {
                {
@@ -207,6 +226,9 @@ struct KEYMAPE (6) cXmap = {
                        '0', '4', cX0, (KEYMAP *) & cX4map
                },
                {
+                       '8', '8', cX8, (KEYMAP *) & cX8map
+               },
+               {
                        '=', '=', cXeq, NULL
                },
                {
@@ -491,6 +513,18 @@ static struct KEYMAPE (1) overwmap = {
        }
 };
 
+static struct KEYMAPE (1) rawmap = {
+       0,
+       1,              /* 1 to avoid 0 sized array */
+       rescan,
+       {
+               /* unused dummy entry for VMS C */
+               {
+                       (KCHAR)0, (KCHAR)0, NULL, NULL
+               }
+       }
+};
+
 
 /*
  * The basic (root) keyboard map
@@ -513,6 +547,7 @@ static struct maps_s map_table[] = {
        {(KEYMAP *) &notabmap, "notab",},
 #endif /* NOTAB */
        {(KEYMAP *) &overwmap, "overwrite",},
+       {(KEYMAP *) &rawmap, "raw",},
        {(KEYMAP *) &metamap, "esc prefix",},
        {(KEYMAP *) &cXmap, "c-x prefix",},
        {(KEYMAP *) &cX4map, "c-x 4 prefix",},
Index: line.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/line.c,v
retrieving revision 1.59
diff -u -p -u -p -r1.59 line.c
--- line.c      9 Sep 2017 13:10:28 -0000       1.59
+++ line.c      30 May 2018 13:34:01 -0000
@@ -23,6 +23,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
 
 #include "def.h"
 
@@ -327,15 +328,15 @@ lnewline(void)
 }
 
 /*
- * This function deletes "n" bytes, starting at dot. (actually, n+1, as the
- * newline is included) It understands how to deal with end of lines, etc.
- * It returns TRUE if all of the characters were deleted, and FALSE if
- * they were not (because dot ran into the end of the buffer).
- * The "kflag" indicates either no insertion, or direction  of insertion
- * into the kill buffer.
+ * This function deletes "n" bytes, starting at dot. (actually,
+ * n+1, as the newline is included) It understands how to deal with
+ * end of lines, etc.  It returns TRUE if all of the characters
+ * were deleted, and FALSE if they were not (because dot ran into
+ * the end of the buffer).  The "kflag" indicates either no insertion,
+ * or direction  of insertion into the kill buffer.
  */
 int
-ldelete(RSIZE n, int kflag)
+ldeletebyte(RSIZE n, int kflag)
 {
        struct line     *dotp;
        RSIZE            chunk;
@@ -355,6 +356,7 @@ ldelete(RSIZE n, int kflag)
                ewprintf("Buffer is read only");
                goto out;
        }
+
        len = n;
        if ((sv = calloc(1, len + 1)) == NULL)
                goto out;
@@ -417,6 +419,50 @@ out:
 }
 
 /*
+ * This function is like ldeletebyte, but it deletes "n" characters.
+ */
+int
+ldeletechar(RSIZE n, int kflag)
+{
+       struct line     *dotp;
+       int              doto;
+
+       /*
+        * If we're in a wide context, the caller meant n *characters*,
+        * but we prefer to work in bytes, so let's expand out n.
+        */
+       if (!(curbp->b_flag & BFSHOWRAW)) {
+               size_t new_n = 0;
+               mbstate_t mbs = { 0 };
+               wchar_t wc = 0;
+               size_t consumed = 0;
+               dotp = curwp->w_dotp;
+               doto = curwp->w_doto;
+               for (size_t j = 0; j < n; ++j) {
+                       if (doto == llength(dotp)) {
+                               doto = 0;
+                               dotp = lforw(dotp);
+                               new_n++;
+                       } else {
+                               size_t len = llength(dotp) - doto;
+                               consumed = mbrtowc(&wc,
+                                                  &dotp->l_text[doto],
+                                                  len, &mbs);
+                               if (consumed < (size_t) -2) {
+                                       new_n += consumed;
+                                       doto += consumed;
+                               } else {
+                                       new_n++;
+                                       doto++;
+                               }
+                       }
+               }
+               return ldeletebyte(new_n, kflag);
+       }
+       return ldeletebyte(n, kflag);
+}
+
+/*
  * Delete a newline and join the current line with the next line. If the next
  * line is the magic header line always return TRUE; merging the last line
  * with the header line can be thought of as always being a successful
@@ -523,7 +569,7 @@ lreplace(RSIZE plen, char *st)
        undo_boundary_enable(FFRAND, 0);
 
        (void)backchar(FFARG | FFRAND, (int)plen);
-       (void)ldelete(plen, KNONE);
+       (void)ldeletechar(plen, KNONE);
 
        rlen = strlen(st);
        region_put_data(st, rlen);
Index: match.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/match.c,v
retrieving revision 1.19
diff -u -p -u -p -r1.19 match.c
--- match.c     3 Jun 2015 23:40:01 -0000       1.19
+++ match.c     30 May 2018 13:34:01 -0000
@@ -10,6 +10,7 @@
  * but there's enough overhead in the editor as it is.
  */
 
+#include <ctype.h>
 #include <sys/queue.h>
 #include <signal.h>
 #include <stdio.h>
@@ -172,7 +173,7 @@ displaymatch(struct line *clp, int cbo)
                            || (curbp->b_flag & BFNOTAB)
 #endif
                                )
-                               if (ISCTRL(c)) {
+                               if (iscntrl(c)) {
                                        buf[bufo++] = '^';
                                        buf[bufo++] = CCHR(c);
                                } else
Index: mg.1
===================================================================
RCS file: /cvs/src/usr.bin/mg/mg.1,v
retrieving revision 1.106
diff -u -p -u -p -r1.106 mg.1
--- mg.1        11 Dec 2017 07:27:07 -0000      1.106
+++ mg.1        30 May 2018 13:34:01 -0000
@@ -1,7 +1,7 @@
 .\"    $OpenBSD: mg.1,v 1.106 2017/12/11 07:27:07 jmc Exp $
 .\" This file is in the public domain.
 .\"
-.Dd $Mdocdate: December 11 2017 $
+.Dd $Mdocdate: May 28 2018 $
 .Dt MG 1
 .Os
 .Sh NAME
@@ -849,7 +849,7 @@ This is the default.
 .It set-default-mode
 Append the supplied mode to the list of default modes
 used by subsequent buffer creation.
-Built in modes include: fill, indent and overwrite.
+Built in modes include: fill, indent, overwrite, and raw.
 .It set-fill-column
 Prompt the user for a fill column.
 Used by auto-fill-mode.
@@ -861,6 +861,8 @@ Sets the prefix string to be used by the
 Execute external command from mini-buffer.
 .It shell-command-on-region
 Provide the text in region to the shell command as input.
+.It show-raw-mode
+ASCII encoding mode.
 .It shrink-window
 Shrink current window by one line.
 The window immediately below is expanded to pick up the slack.
@@ -1091,5 +1093,3 @@ In order to use 8-bit characters (such a
 needs to be disabled via the
 .Dq meta-key-mode
 command.
-.Pp
-Multi-byte character sets, such as UTF-8, are not supported.
Index: modes.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/modes.c,v
retrieving revision 1.21
diff -u -p -u -p -r1.21 modes.c
--- modes.c     30 May 2017 07:05:22 -0000      1.21
+++ modes.c     30 May 2018 13:34:01 -0000
@@ -111,6 +111,23 @@ overwrite_mode(int f, int n)
 }
 
 int
+show_raw_mode(int f, int n)
+{
+       if (changemode(f, n, "raw") == FALSE)
+               return (FALSE);
+       if (f & FFARG) {
+               if (n <= 0)
+                       curbp->b_flag &= ~BFSHOWRAW;
+               else
+                       curbp->b_flag |= BFSHOWRAW;
+       } else
+               curbp->b_flag ^= BFSHOWRAW;
+
+       sgarbf = TRUE;
+       return (TRUE);
+}
+
+int
 set_default_mode(int f, int n)
 {
        int      i;
@@ -170,5 +187,11 @@ set_default_mode(int f, int n)
                        defb_flag |= BFNOTAB;
        }
 #endif /* NOTAB */
+       if (strcmp(modebuf, "raw") == 0) {
+               if (n <= 0)
+                       defb_flag &= ~BFSHOWRAW;
+               else
+                       defb_flag |= BFSHOWRAW;
+       }
        return (TRUE);
 }
Index: paragraph.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/paragraph.c,v
retrieving revision 1.45
diff -u -p -u -p -r1.45 paragraph.c
--- paragraph.c 6 Sep 2016 16:25:47 -0000       1.45
+++ paragraph.c 30 May 2018 13:34:01 -0000
@@ -24,6 +24,16 @@ static int   findpara(void);
 static int     do_gotoeop(int, int, int *);
 
 /*
+ * Returns non-zero iff c is "end of sentence punctuation". Used
+ * only for determining whether to double-space sentences.
+ */
+static int
+iseosp(int c)
+{
+       return (c == '?' || c == '!' || c == '.');
+}
+
+/*
  * Move to start of paragraph.
  * Move backwards by line, checking from the 1st character forwards for the
  * existence a non-space. If a non-space character is found, move to the 
@@ -179,7 +189,7 @@ fillpara(int f, int n)
                        c = lgetc(curwp->w_dotp, curwp->w_doto);
 
                /* and then delete it */
-               if (ldelete((RSIZE) 1, KNONE) == FALSE && !eopflag) {
+               if (ldeletechar((RSIZE) 1, KNONE) == FALSE && !eopflag) {
                        retval = FALSE;
                        goto cleanup;
                }
@@ -207,13 +217,16 @@ fillpara(int f, int n)
                         * character was one of '.','?','!' doublespace here.
                         * behave the same way if a ')' is preceded by a
                         * [.?!] and followed by a doublespace.
+                        *
+                        * These rules are too complicated for multibyte
+                        * characters, so they just get single-spaced.
                         */
                        if (dblspace && (!eopflag && ((eolflag ||
                            curwp->w_doto == llength(curwp->w_dotp) ||
                            (c = lgetc(curwp->w_dotp, curwp->w_doto)) == ' '
-                           || c == '\t') && (ISEOSP(wbuf[wordlen - 1]) ||
+                           || c == '\t') && (iseosp(wbuf[wordlen - 1]) ||
                            (wbuf[wordlen - 1] == ')' && wordlen >= 2 &&
-                           ISEOSP(wbuf[wordlen - 2])))) &&
+                           iseosp(wbuf[wordlen - 2])))) &&
                            wordlen < MAXWORD - 1))
                                wbuf[wordlen++] = ' ';
 
@@ -229,7 +242,7 @@ fillpara(int f, int n)
                                if (curwp->w_doto > 0 &&
                                    lgetc(curwp->w_dotp, curwp->w_doto - 1) == 
' ') {
                                        curwp->w_doto -= 1;
-                                       (void)ldelete((RSIZE) 1, KNONE);
+                                       (void)ldeletechar((RSIZE) 1, KNONE);
                                }
                                /* start a new line */
                                (void)lnewline();
@@ -421,7 +434,7 @@ fillword(int f, int n)
 #endif
                        )
                        col |= 0x07;
-               else if (ISCTRL(c) != FALSE)
+               else if (iscntrl(c) != FALSE)
                        ++col;
        }
        if (curwp->w_doto != llength(curwp->w_dotp)) {
Index: re_search.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/re_search.c,v
retrieving revision 1.33
diff -u -p -u -p -r1.33 re_search.c
--- re_search.c 6 Aug 2017 04:39:45 -0000       1.33
+++ re_search.c 30 May 2018 13:34:01 -0000
@@ -546,7 +546,7 @@ killmatches(int cond)
                        curwp->w_doto = 0;
                        curwp->w_dotp = clp;
                        count++;
-                       s = ldelete(llength(clp) + 1, KNONE);
+                       s = ldeletebyte(llength(clp) + 1, KNONE);
                        clp = curwp->w_dotp;
                        curwp->w_rflag |= WFMOVE;
                        if (s == FALSE)
Index: region.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/region.c,v
retrieving revision 1.37
diff -u -p -u -p -r1.37 region.c
--- region.c    9 Sep 2016 06:05:51 -0000       1.37
+++ region.c    30 May 2018 13:34:01 -0000
@@ -15,12 +15,15 @@
 #include <sys/wait.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <limits.h>
 #include <poll.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "def.h"
 
@@ -55,7 +58,7 @@ killregion(int f, int n)
        curwp->w_dotp = region.r_linep;
        curwp->w_doto = region.r_offset;
        curwp->w_dotline = region.r_lineno;
-       s = ldelete(region.r_size, KFORW | KREG);
+       s = ldeletebyte(region.r_size, KFORW | KREG);
        clearmark(FFARG, 0);
 
        return (s);
@@ -118,7 +121,13 @@ lowerregion(int f, int n)
 {
        struct line     *linep;
        struct region    region;
-       int      loffs, c, s;
+       int              loffs, s;
+       char             t[MB_LEN_MAX + 1];
+       mbstate_t        mbs = { 0 };
+       wchar_t          wc = 0;
+       wchar_t          wcl = 0;
+       size_t           consumed = 0;
+       size_t           mbslen = 0;
 
        if ((s = checkdirty(curbp)) != TRUE)
                return (s);
@@ -141,10 +150,26 @@ lowerregion(int f, int n)
                        linep = lforw(linep);
                        loffs = 0;
                } else {
-                       c = lgetc(linep, loffs);
-                       if (ISUPPER(c) != FALSE)
-                               lputc(linep, loffs, TOLOWER(c));
-                       ++loffs;
+                       consumed = mbrtowc(&wc, &linep->l_text[loffs],
+                                          llength(linep) - loffs, &mbs);
+                       if (consumed < (size_t) -2) {
+                               wcl = towlower(wc);
+                               mbslen = wcrtomb(t, wcl, &mbs);
+                               if (mbslen == consumed) {
+                                       for (size_t k = 0; k < consumed; ++k) {
+                                               lputc(linep, loffs, t[k]);
+                                               loffs++;
+                                       }
+                                       if (consumed > 0) {
+                                               region.r_size -= (consumed - 1);
+                                       }
+                               } else {
+                                       loffs++;
+                               }
+                       } else {
+                               mbs = (mbstate_t) { 0 };
+                               loffs++;
+                       }
                }
        }
        return (TRUE);
@@ -160,9 +185,15 @@ lowerregion(int f, int n)
 int
 upperregion(int f, int n)
 {
-       struct line      *linep;
-       struct region     region;
-       int       loffs, c, s;
+       struct line     *linep;
+       struct region    region;
+       int              loffs, s;
+       char             t[MB_LEN_MAX + 1];
+       mbstate_t        mbs = { 0 };
+       wchar_t          wc = 0;
+       wchar_t          wcu = 0;
+       size_t           consumed = 0;
+       size_t           mbslen = 0;
 
        if ((s = checkdirty(curbp)) != TRUE)
                return (s);
@@ -171,6 +202,7 @@ upperregion(int f, int n)
                ewprintf("Buffer is read-only");
                return (FALSE);
        }
+
        if ((s = getregion(&region)) != TRUE)
                return (s);
 
@@ -184,10 +216,26 @@ upperregion(int f, int n)
                        linep = lforw(linep);
                        loffs = 0;
                } else {
-                       c = lgetc(linep, loffs);
-                       if (ISLOWER(c) != FALSE)
-                               lputc(linep, loffs, TOUPPER(c));
-                       ++loffs;
+                       consumed = mbrtowc(&wc, &linep->l_text[loffs],
+                                          llength(linep) - loffs, &mbs);
+                       if (consumed < (size_t) -2) {
+                               wcu = towupper(wc);
+                               mbslen = wcrtomb(t, wcu, &mbs);
+                               if (mbslen == consumed) {
+                                       for (size_t k = 0; k < consumed; ++k) {
+                                               lputc(linep, loffs, t[k]);
+                                               loffs++;
+                                       }
+                                       if (consumed > 0) {
+                                               region.r_size -= (consumed - 1);
+                                       }
+                               } else {
+                                       loffs++;
+                               }
+                       } else {
+                               mbs = (mbstate_t) { 0 };
+                               loffs++;
+                       }
                }
        }
        return (TRUE);
Index: search.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/search.c,v
retrieving revision 1.46
diff -u -p -u -p -r1.46 search.c
--- search.c    6 Aug 2017 04:39:45 -0000       1.46
+++ search.c    30 May 2018 13:34:01 -0000
@@ -322,7 +322,7 @@ isearch(int dir)
                        /* if the search is case insensitive, add to pattern 
using lowercase */
                        xcase = 0;
                        for (i = 0; pat[i]; i++)
-                               if (ISUPPER(CHARMASK(pat[i])))
+                               if (isupper((unsigned char)pat[i]))
                                        xcase = 1;
 
                        while (cbo < llength(clp)) {
@@ -335,8 +335,8 @@ isearch(int dir)
                                        break;
                                }
                                firstc = 0;
-                               if (!xcase && ISUPPER(c))
-                                       c = TOLOWER(c);
+                               if (!xcase && isupper(c))
+                                       c = tolower(c);
 
                                pat[pptr++] = c;
                                pat[pptr] = '\0';
@@ -364,7 +364,7 @@ isearch(int dir)
                        c = CCHR('J');
                        goto addchar;
                default:
-                       if (ISCTRL(c)) {
+                       if (iscntrl(c)) {
                                ungetkey(c);
                                curwp->w_markp = clp;
                                curwp->w_marko = cbo;
@@ -694,7 +694,7 @@ forwsrch(void)
        cbo = curwp->w_doto;
        nline = curwp->w_dotline;
        for (i = 0; pat[i]; i++)
-               if (ISUPPER(CHARMASK(pat[i])))
+               if (isupper((unsigned char)pat[i]))
                        xcase = 1;
        for (;;) {
                if (cbo == llength(clp)) {
@@ -755,7 +755,7 @@ backsrch(void)
        cbo = curwp->w_doto;
        nline = curwp->w_dotline;
        for (i = 0; pat[i]; i++)
-               if (ISUPPER(CHARMASK(pat[i])))
+               if (isupper((unsigned char)pat[i]))
                        xcase = 1;
        for (;;) {
                if (cbo == 0) {
@@ -809,16 +809,16 @@ fail:             ;
 static int
 eq(int bc, int pc, int xcase)
 {
-       bc = CHARMASK(bc);
-       pc = CHARMASK(pc);
+       bc = (unsigned char)bc;
+       pc = (unsigned char)pc;
        if (bc == pc)
                return (TRUE);
        if (xcase)
                return (FALSE);
-       if (ISUPPER(bc))
-               return (TOLOWER(bc) == pc);
-       if (ISUPPER(pc))
-               return (bc == TOLOWER(pc));
+       if (isupper(bc))
+               return (tolower(bc) == pc);
+       if (isupper(pc))
+               return (bc == tolower(pc));
        return (FALSE);
 }
 
Index: tags.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/tags.c,v
retrieving revision 1.16
diff -u -p -u -p -r1.16 tags.c
--- tags.c      6 Aug 2017 04:39:45 -0000       1.16
+++ tags.c      30 May 2018 13:34:01 -0000
@@ -49,7 +49,7 @@ struct ctag {
        char *pat;
 };
 RB_HEAD(tagtree, ctag) tags = RB_INITIALIZER(&tags);
-RB_GENERATE(tagtree, ctag, entry, ctagcmp);
+RB_GENERATE(tagtree, ctag, entry, ctagcmp)
 
 struct tagpos {
        SLIST_ENTRY(tagpos) entry;
@@ -433,10 +433,15 @@ searchpat(char *s_pat)
 int
 atbow(void)
 {
+       size_t len = llength(curwp->w_dotp);
+       struct line *clp = curwp->w_dotp;
+       size_t o = curwp->w_doto;
+
        if (curwp->w_doto == 0)
                return (TRUE);
-       if (ISWORD(curwp->w_dotp->l_text[curwp->w_doto]) &&
-           !ISWORD(curwp->w_dotp->l_text[curwp->w_doto - 1]))
+
+       if (byteinword(clp->l_text, o, len - o) &&
+           !byteinword(clp->l_text, o - 1, 1))
                return (TRUE);
        return (FALSE);
 }
@@ -449,15 +454,13 @@ curtoken(int f, int n, char *token)
 {
        struct line *odotp;
        int odoto, tdoto, odotline, size, r;
-       char c;
        
        /* Underscore character is to be treated as "inword" while
         * processing tokens unlike mg's default word traversal. Save
         * and restore it's cinfo value so that tag matching works for
         * identifier with underscore.
         */
-       c = cinfo['_'];
-       cinfo['_'] = _MG_W;
+       treatunderscoreasword(1);
        
        odotp = curwp->w_dotp;
        odoto = curwp->w_doto;
@@ -491,10 +494,10 @@ curtoken(int f, int n, char *token)
        r = TRUE;
        
 cleanup:
-       cinfo['_'] = c;
        curwp->w_dotp = odotp;
        curwp->w_doto = odoto;
        curwp->w_dotline = odotline;
+       treatunderscoreasword(0);
        return (r);
 }
 
Index: ttykbd.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/ttykbd.c,v
retrieving revision 1.19
diff -u -p -u -p -r1.19 ttykbd.c
--- ttykbd.c    17 Dec 2017 14:37:57 -0000      1.19
+++ ttykbd.c    30 May 2018 13:34:01 -0000
@@ -18,12 +18,6 @@
 #include "kbd.h"
 
 /*
- * Get keyboard character.  Very simple if you use keymaps and keys files.
- */
-
-char   *keystrings[] = {NULL};
-
-/*
  * Turn on function keys using keypad_xmit, then load a keys file, if
  * available.  The keys file is located in the same manner as the startup
  * file is, depending on what startupfile() does on your system.
Index: undo.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/undo.c,v
retrieving revision 1.58
diff -u -p -u -p -r1.58 undo.c
--- undo.c      5 Sep 2016 08:10:58 -0000       1.58
+++ undo.c      30 May 2018 13:34:01 -0000
@@ -562,7 +562,7 @@ undo(int f, int n)
                         */
                        switch (ptr->type) {
                        case INSERT:
-                               ldelete(ptr->region.r_size, KNONE);
+                               ldeletebyte(ptr->region.r_size, KNONE);
                                break;
                        case DELETE:
                                lp = curwp->w_dotp;
Index: util.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/util.c,v
retrieving revision 1.38
diff -u -p -u -p -r1.38 util.c
--- util.c      18 Nov 2015 18:21:06 -0000      1.38
+++ util.c      30 May 2018 13:34:01 -0000
@@ -11,8 +11,12 @@
 
 #include <sys/queue.h>
 #include <ctype.h>
+#include <limits.h>
 #include <signal.h>
 #include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <wchar.h>
 
 #include "def.h"
 
@@ -33,6 +37,9 @@ showcpos(int f, int n)
        int      nline, row;
        int      cline, cbyte;          /* Current line/char/byte */
        int      ratio;
+       char     ismb = 0;
+       wchar_t  wc = 0;
+       char     mbc[MB_LEN_MAX + 1];
 
        /* collect the data */
        clp = bfirstlp(curbp);
@@ -69,8 +76,43 @@ showcpos(int f, int n)
                clp = lforw(clp);
        }
        ratio = nchar ? (100L * cchar) / nchar : 100;
-       ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d  col=%d",
-           cbyte, cbyte, cchar, ratio, cline, row, getcolpos(curwp));
+
+       if (!(curbp->b_flag & BFSHOWRAW)) {
+               mbstate_t mbs = { 0 };
+               size_t consumed = 0;
+               size_t offset = 0;
+               while (cbyte != '\n' && offset <= curwp->w_doto) {
+                       int c = lgetc(clp, curwp->w_doto - offset);
+                       if (isprint(c) || (iscntrl(c) != FALSE)) {
+                               break;
+                       }
+                       consumed = mbrtowc(&wc,
+                                          &clp->l_text[curwp->w_doto - offset],
+                                          llength(clp) - curwp->w_doto + 
offset,
+                                          &mbs);
+                       if (consumed < (size_t) -2) {
+                               ismb = (offset < consumed);
+                               snprintf(mbc, consumed + 1, "%s",
+                                        &clp->l_text[curwp->w_doto - offset]);
+                               mbc[consumed + 1] = '\0';
+                               break;
+                       } else {
+                               memset(&mbs, 0, sizeof mbs);
+                       }
+                       offset++;
+               }
+       }
+
+       if (ismb) {
+               ewprintf("Char: %s (codepoint 0x%lx) Byte: %c (0%o)  "
+                        "point=%ld(%d%%)  line=%d  row=%d col=%d", mbc,
+                        (long) wc, cbyte, cbyte, cchar, ratio, cline, row,
+                        getcolpos(curwp));
+       } else {
+               ewprintf("Char: %c (0%o)  point=%ld(%d%%)  line=%d  row=%d"
+                        "col=%d", cbyte, cbyte, cchar, ratio, cline, row,
+                        getcolpos(curwp));
+       }
        return (TRUE);
 }
 
@@ -92,10 +134,26 @@ getcolpos(struct mgwin *wp)
                        ) {
                        col |= 0x07;
                        col++;
-               } else if (ISCTRL(c) != FALSE)
+               } else if (iscntrl(c) != FALSE)
                        col += 2;
                else if (isprint(c)) {
                        col++;
+               } else if (!(wp->w_bufp->b_flag & BFSHOWRAW)) {
+                       mbstate_t mbs = { 0 };
+                       wchar_t wc = 0;
+                       size_t consumed = mbrtowc(&wc, &wp->w_dotp->l_text[i],
+                                                 llength(wp->w_dotp) - i,
+                                                 &mbs);
+                       int width = -1;
+                       if (consumed < (size_t) -2) {
+                               width = wcwidth(wc);
+                       }
+                       if (width >= 0) {
+                               col += width;
+                               i += (consumed - 1);
+                       } else {
+                               col += snprintf(tmp, sizeof(tmp), "\\%o", c);
+                       }
                } else {
                        col += snprintf(tmp, sizeof(tmp), "\\%o", c);
                }
@@ -234,7 +292,7 @@ deblank(int f, int n)
                return (TRUE);
        curwp->w_dotp = lforw(lp1);
        curwp->w_doto = 0;
-       return (ldelete((RSIZE)nld, KNONE));
+       return (ldeletechar((RSIZE)nld, KNONE));
 }
 
 /*
@@ -275,7 +333,7 @@ delwhite(int f, int n)
 
        if (s == TRUE)
                (void)forwchar(FFRAND, 1);
-       (void)ldelete((RSIZE)(col - curwp->w_doto), KNONE);
+       (void)ldeletebyte((RSIZE)(col - curwp->w_doto), KNONE);
        return (TRUE);
 }
 
@@ -427,7 +485,7 @@ forwdel(int f, int n)
                thisflag |= CFKILL;
        }
 
-       return (ldelete((RSIZE) n, (f & FFARG) ? KFORW : KNONE));
+       return (ldeletechar((RSIZE) n, (f & FFARG) ? KFORW : KNONE));
 }
 
 /*
@@ -451,7 +509,7 @@ backdel(int f, int n)
                thisflag |= CFKILL;
        }
        if ((s = backchar(f | FFRAND, n)) == TRUE)
-               s = ldelete((RSIZE)n, (f & FFARG) ? KFORW : KNONE);
+               s = ldeletechar((RSIZE)n, (f & FFARG) ? KFORW : KNONE);
 
        return (s);
 }
Index: word.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/word.c,v
retrieving revision 1.19
diff -u -p -u -p -r1.19 word.c
--- word.c      30 Dec 2015 20:51:51 -0000      1.19
+++ word.c      30 May 2018 13:34:01 -0000
@@ -11,9 +11,12 @@
 #include <sys/queue.h>
 #include <signal.h>
 #include <errno.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <wchar.h>
+#include <wctype.h>
 
 #include "def.h"
 
@@ -231,8 +234,16 @@ grabword(char **word)
 int
 upperword(int f, int n)
 {
-       int     c, s;
-       RSIZE   size;
+       int              s;
+       RSIZE            size;
+       mbstate_t        mbs = { 0 };
+       wchar_t          wc = 0;
+       wchar_t          wcu = 0;
+       size_t           consumed = 0;
+       size_t           mbslen = 0;
+       struct line     *clp;
+       size_t           o;
+       char             t[MB_LEN_MAX + 1];
 
        if ((s = checkdirty(curbp)) != TRUE)
                return (s);
@@ -253,12 +264,28 @@ upperword(int f, int n)
                undo_add_change(curwp->w_dotp, curwp->w_doto, size);
 
                while (inword() != FALSE) {
-                       c = lgetc(curwp->w_dotp, curwp->w_doto);
-                       if (ISLOWER(c) != FALSE) {
-                               c = TOUPPER(c);
-                               lputc(curwp->w_dotp, curwp->w_doto, c);
-                               lchange(WFFULL);
+                       clp = curwp->w_dotp;
+                       o = curwp->w_doto;
+                       consumed = mbrtowc(&wc, &clp->l_text[o],
+                                          llength(clp) - o, &mbs);
+                       if (consumed < (size_t) -2) {
+                               wcu = towupper(wc);
+                               mbslen = wcrtomb(t, wcu, &mbs);
+                               if (mbslen == consumed) {
+                                       for (size_t k = 0; k < consumed; ++k) {
+                                               lputc(clp, o, t[k]);
+                                               o++;
+                                       }
+                               } else {
+                                       o++;
+                               }
+                       } else {
+                               mbs = (mbstate_t) { 0 };
+                               o++;
                        }
+
+                       lchange(WFFULL);
+
                        if (forwchar(FFRAND, 1) == FALSE)
                                return (TRUE);
                }
@@ -274,8 +301,16 @@ upperword(int f, int n)
 int
 lowerword(int f, int n)
 {
-       int     c, s;
-       RSIZE   size;
+       int              s;
+       RSIZE            size;
+       mbstate_t        mbs = { 0 };
+       wchar_t          wc = 0;
+       wchar_t          wcu = 0;
+       size_t           consumed = 0;
+       size_t           mbslen = 0;
+       struct line     *clp;
+       size_t           o;
+       char             t[MB_LEN_MAX + 1];
 
        if ((s = checkdirty(curbp)) != TRUE)
                return (s);
@@ -284,6 +319,7 @@ lowerword(int f, int n)
                ewprintf("Buffer is read-only");
                return (FALSE);
        }
+
        if (n < 0)
                return (FALSE);
        while (n--) {
@@ -295,12 +331,28 @@ lowerword(int f, int n)
                undo_add_change(curwp->w_dotp, curwp->w_doto, size);
 
                while (inword() != FALSE) {
-                       c = lgetc(curwp->w_dotp, curwp->w_doto);
-                       if (ISUPPER(c) != FALSE) {
-                               c = TOLOWER(c);
-                               lputc(curwp->w_dotp, curwp->w_doto, c);
-                               lchange(WFFULL);
+                       clp = curwp->w_dotp;
+                       o = curwp->w_doto;
+                       consumed = mbrtowc(&wc, &clp->l_text[o],
+                                          llength(clp) - o, &mbs);
+                       if (consumed < (size_t) -2) {
+                               wcu = towlower(wc);
+                               mbslen = wcrtomb(t, wcu, &mbs);
+                               if (mbslen == consumed) {
+                                       for (size_t k = 0; k < consumed; ++k) {
+                                               lputc(clp, o, t[k]);
+                                               o++;
+                                       }
+                               } else {
+                                       o++;
+                               }
+                       } else {
+                               mbs = (mbstate_t) { 0 };
+                               o++;
                        }
+
+                       lchange(WFFULL);
+
                        if (forwchar(FFRAND, 1) == FALSE)
                                return (TRUE);
                }
@@ -318,8 +370,17 @@ lowerword(int f, int n)
 int
 capword(int f, int n)
 {
-       int     c, s;
-       RSIZE   size;
+       int              s;
+       RSIZE            size;
+       mbstate_t        mbs = { 0 };
+       wchar_t          wc = 0;
+       wchar_t          wcu = 0;
+       wchar_t          wcl = 0;
+       size_t           consumed = 0;
+       size_t           mbslen = 0;
+       struct line     *clp;
+       size_t           o;
+       char             t[MB_LEN_MAX + 1];
 
        if ((s = checkdirty(curbp)) != TRUE)
                return (s);
@@ -340,21 +401,46 @@ capword(int f, int n)
                undo_add_change(curwp->w_dotp, curwp->w_doto, size);
 
                if (inword() != FALSE) {
-                       c = lgetc(curwp->w_dotp, curwp->w_doto);
-                       if (ISLOWER(c) != FALSE) {
-                               c = TOUPPER(c);
-                               lputc(curwp->w_dotp, curwp->w_doto, c);
-                               lchange(WFFULL);
+                       clp = curwp->w_dotp;
+                       o = curwp->w_doto;
+                       consumed = mbrtowc(&wc, &clp->l_text[o],
+                                          llength(clp) - o, &mbs);
+                       if (consumed < (size_t) -2) {
+                               wcu = towupper(wc);
+                               mbslen = wcrtomb(t, wcu, &mbs);
+                               if (mbslen == consumed) {
+                                       for (size_t k = 0; k < consumed; ++k) {
+                                               lputc(clp, o, t[k]);
+                                               o++;
+                                       }
+                                       lchange(WFFULL);
+                               }
+                       } else {
+                               mbs = (mbstate_t) { 0 };
                        }
+
                        if (forwchar(FFRAND, 1) == FALSE)
                                return (TRUE);
+
                        while (inword() != FALSE) {
-                               c = lgetc(curwp->w_dotp, curwp->w_doto);
-                               if (ISUPPER(c) != FALSE) {
-                                       c = TOLOWER(c);
-                                       lputc(curwp->w_dotp, curwp->w_doto, c);
-                                       lchange(WFFULL);
+                               clp = curwp->w_dotp;
+                               o = curwp->w_doto;
+                               consumed = mbrtowc(&wc, &clp->l_text[o],
+                                                  llength(clp) - o, &mbs);
+                               if (consumed < (size_t) -2) {
+                                       wcl = towlower(wc);
+                                       mbslen = wcrtomb(t, wcl, &mbs);
+                                       if (mbslen == consumed) {
+                                               for (size_t k = 0; k < 
consumed; ++k) {
+                                                       lputc(clp, o, t[k]);
+                                                       o++;
+                                               }
+                                               lchange(WFFULL);
+                                       }
+                               } else {
+                                       mbs = (mbstate_t) { 0 };
                                }
+
                                if (forwchar(FFRAND, 1) == FALSE)
                                        return (TRUE);
                        }
@@ -364,7 +450,7 @@ capword(int f, int n)
 }
 
 /*
- * Count characters in word, from current position
+ * Count bytes in word, from current position
  */
 RSIZE
 countfword()
@@ -372,16 +458,24 @@ countfword()
        RSIZE            size;
        struct line     *dotp;
        int              doto;
+       struct line     *last_dotp;
+       int              last_doto;
 
        dotp = curwp->w_dotp;
        doto = curwp->w_doto;
        size = 0;
 
        while (inword() != FALSE) {
+               last_dotp = curwp->w_dotp;
+               last_doto = curwp->w_doto;
                if (forwchar(FFRAND, 1) == FALSE)
                        /* hit the end of the buffer */
                        goto out;
-               ++size;
+               if (last_dotp == curwp->w_dotp) {
+                       size += (curwp->w_doto - last_doto);
+               } else {
+                       size++;
+               }
        }
 out:
        curwp->w_dotp = dotp;
@@ -438,7 +532,7 @@ delfword(int f, int n)
 out:
        curwp->w_dotp = dotp;
        curwp->w_doto = doto;
-       return (ldelete(size, KFORW));
+       return (ldeletechar(size, KFORW));
 }
 
 /*
@@ -498,7 +592,7 @@ delbword(int f, int n)
        /* undo assumed delete */
        --size;
 out:
-       return (ldelete(size, KBACK));
+       return (ldeletechar(size, KBACK));
 }
 
 /*
@@ -509,6 +603,7 @@ int
 inword(void)
 {
        /* can't use lgetc in ISWORD due to bug in OSK cpp */
-       return (curwp->w_doto != llength(curwp->w_dotp) &&
-           ISWORD(curwp->w_dotp->l_text[curwp->w_doto]));
+       size_t l = llength(curwp->w_dotp);
+       size_t k = curwp->w_doto;
+       return (k != l) && byteinword(curwp->w_dotp->l_text, k, l - k);
 }
Index: yank.c
===================================================================
RCS file: /cvs/src/usr.bin/mg/yank.c,v
retrieving revision 1.14
diff -u -p -u -p -r1.14 yank.c
--- yank.c      11 Dec 2015 20:21:23 -0000      1.14
+++ yank.c      30 May 2018 13:34:01 -0000
@@ -105,7 +105,7 @@ kremove(int n)
 {
        if (n < 0 || n + kstart >= kused)
                return (-1);
-       return (CHARMASK(kbufp[n + kstart]));
+       return ((unsigned char)(kbufp[n + kstart]));
 }
 
 /*
@@ -206,7 +206,7 @@ killline(int f, int n)
         */
 done:
        if (chunk)
-               return (ldelete(chunk, KFORW));
+               return (ldeletebyte(chunk, KFORW));
        return (TRUE);
 }
 

Reply via email to