Rather than printing byte sequences for any non-ASCII characters,
printable (via isprintrune()) Unicode characters are displayed
normally. The usual \$, \t, \b and \\ escapes are displayed, but
other non-printing characters are replaced with a Unicode escape
(\uXXXX).

This may be controversial, as it contradicts POSIX. Rationale:

* Replacing printing non-ASCII runes with byte sequences
is pointless. There is no reason to escape multibyte
characters.

* UTF-8 sequences should not be printed. It is far more
useful to decode the sequence and print the Unicode code
point. '\u2028' is much easier to understand than
'\xe2\x80\xa8'--we are not forced to decode the
transformation format.
---
 ed.c | 72 +++++++++++++++++++++++++++++++++++++-------------------------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/ed.c b/ed.c
index 4b28848..e737d57 100644
--- a/ed.c
+++ b/ed.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "utf.h"
 #include "util.h"
 
 #define REGEXSIZE  100
@@ -653,48 +654,53 @@ doread(const char *fname)
 }
 
 static void
+lprint(char *s)
+{
+       int size;
+       Rune r;
+
+       while ((size = chartorune(&r, s)) > 0 && r != '\n') {
+               switch (r) {
+               case '$':
+                       fputs("\\$", stdout);
+                       break;
+               case '\t':
+                       fputs("\\t", stdout);
+                       break;
+               case '\b':
+                       fputs("\\b", stdout);
+                       break;
+               case '\\':
+                       fputs("\\\\", stdout);
+                       break;
+               default:
+                       if (!isprintrune(r))
+                               printf("\\u%04x", 0xFFFF & r);
+                       else
+                               fputrune(&r, stdout);
+               }
+               s += size;
+       }
+}
+
+static void
 doprint(void)
 {
-       int i, c;
-       char *s, *str;
+       int i;
+       char *s;
 
        if (line1 <= 0 || line2 > lastln)
                error("incorrect address");
        for (i = line1; i <= line2; ++i) {
                if (pflag == 'n')
                        printf("%d\t", i);
-               for (s = gettxt(i); (c = *s) != '\n'; ++s) {
-                       if (pflag != 'l')
-                               goto print_char;
-                       switch (c) {
-                       case '$':
-                               str = "\\$";
-                               goto print_str;
-                       case '\t':
-                               str = "\\t";
-                               goto print_str;
-                       case '\b':
-                               str = "\\b";
-                               goto print_str;
-                       case '\\':
-                               str = "\\\\";
-                               goto print_str;
-                       default:
-                               if (!isprint(c)) {
-                                       printf("\\x%x", 0xFF & c);
-                                       break;
-                               }
-                       print_char:
-                               putchar(c);
-                               break;
-                       print_str:
-                               fputs(str, stdout);
-                               break;
-                       }
+               s = gettxt(i);
+               if (pflag == 'l') {
+                       lprint(s);
+                       fputs("$\n", stdout);
+               } else {
+                       fputs(s, stdout);
                }
-               if (pflag == 'l')
-                       fputs("$", stdout);
-               putc('\n', stdout);
        }
        curln = i - 1;
 }
-- 
2.9.0


Reply via email to