Now calculates utf-8 rune width properly before trying to print on screen.
works with test1.txt and test2.txt on tests/files/utf8 folder with
0x0300-0x036F combining chars

Uses mbtowc and wcwidth to calculate width of rune. These both should be
implemented on c runtime that conforms POSIX-1.2001. Different c
runtimes might have different level of support to combining char
ranges etc...

I think there is no standard way to calculate utf-8 rune width without
converting it first to widechar. And i think conversion to widechar just
to calculate width is silly, since all write calls can be done with utf8
directly (on utf8 locales ofc), but in order to calculate them yourself
without pointless conversion, one would need to write variable byte lookup
array for binary searching weird ranges and make sure it works with
big-endian
systems too...

By the way running ./watch ./cat tests/files/utf8/japan.txt does not print
the
text for some reason, but other test data does... I was checking how
well original crunch_str works and noticed it.

-Jarno
From fb744874ced9f883888f5cfbe9752eb1fecd01cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jarno=20M=C3=A4kip=C3=A4=C3=A4?= <jmaki...@gmail.com>
Date: Thu, 28 Mar 2019 08:32:36 +0200
Subject: [PATCH] vi.c improved utf-8 support

Now calculates utf-8 rune width properly before trying to print on screen.
works with test1.txt and test2.txt on tests/files/utf8 folder with
0x0300-0x036F combining chars

Uses mbtowc and wcwidth to calculate width of rune. These both should be
implemented on c runtime that conforms POSIX-1.2001. Different c
runtimes might have different level of support to combining char
ranges etc...

I think there is no standard way to calculate utf-8 rune width without
converting it first to widechar. And i think conversion to widechar just
to calculate width is silly, since all write calls can be done with utf8
directly (on utf8 locales ofc), but in order to calculate them yourself
without pointless conversion, one would need to write variable byte lookup
array for binary searching weird ranges and make sure it works with big-endian
systems too...

By the way running ./watch ./cat tests/files/utf8/japan.txt does not print the
text for some reason, but other test data does... I was checking how
well original crunch_str works and noticed it.

-Jarno
---
 toys/pending/vi.c | 231 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 165 insertions(+), 66 deletions(-)

diff --git a/toys/pending/vi.c b/toys/pending/vi.c
index 6a1c2349..ab15bfe0 100644
--- a/toys/pending/vi.c
+++ b/toys/pending/vi.c
@@ -31,8 +31,6 @@ GLOBALS(
  *
  * TODO:
  * BUGS:  screen pos adjust does not cover "widelines"
- *        utf8 problems with some files. perhaps use lib utf8 functions instead
- *        append to EOL does not show input but works when ESC out
  *        
  *
  * REFACTOR:  use dllist functions where possible.
@@ -57,10 +55,13 @@ struct linestack_show {
 };
 
 static void draw_page();
+static int draw_str_until(int *drawn, char *str, int width, int bytes);
 static void draw_char(char c, int x, int y, int highlight);
 //utf8 support
+static int utf8_lnw(int* width, char* str, int bytes);
 static int utf8_dec(char key, char *utf8_scratch, int *sta_p) ;
 static int utf8_len(char *str);
+static int utf8_width(char *str, int bytes);
 static int draw_rune(char *c, int x, int y, int highlight);
 
 
@@ -644,79 +645,110 @@ static void draw_page()
   int cy_scr = 0;
   int cx_scr = 0;
   int utf_l = 0;
+
+  char* line = 0;
+  int bytes = 0;
+  int drawn = 0;
   struct linelist *scr_buf= scr_r;
   //clear screen
   tty_esc("2J");
   tty_esc("H");
 
-
   tty_jump(0, 0);
+
+  //draw lines until cursor row
   for (; y < TT.screen_height; ) {
-    if (scr_buf && scr_buf->line->str_data && scr_buf->line->str_len) {
-      int p = 0;
-      for (; p < scr_buf->line->str_len; y++) {
-        unsigned x = 0;
-        for (; x < TT.screen_width; x++) {
-          if (p < scr_buf->line->str_len) {
-            int hi = 0;
-            if (scr_buf == c_r && p == TT.cur_col) {
-              if (TT.vi_mode == 2) {
-                tty_jump(x, y);
-                
-                tty_esc("1m"); //bold
-                printf("%s", il->str_data);
-                x += il->str_len;
-                tty_esc("0m"); 
-              } 
-              cy_scr = y;
-              cx_scr = x;
-            }
-            utf_l = draw_rune(&scr_buf->line->str_data[p], x, y, hi);
-            if (!utf_l)
-              break;
-            p += utf_l;
-            if (utf_l > 2) x++;//traditional chinese is somehow 2 width in tty???
-          }
-          else {
-            if (scr_buf == c_r && p == TT.cur_col) {
-              if (TT.vi_mode == 2) {
-                tty_jump(x, y);
-                
-                tty_esc("1m"); //bold
-                printf("%s", il->str_data);
-                x += il->str_len;
-                tty_esc("0m"); 
-              } 
-              cy_scr = y;
-              cx_scr = x;
-            }
-            break; 
-          }
-        }
-        printf("\r\n"); 
+    if (line && bytes) {
+      draw_str_until(&drawn, line, TT.screen_width, bytes);
+      bytes = drawn ? (bytes-drawn) : 0;
+      line = bytes ? (line+drawn) : 0;
+      y++;
+      tty_jump(0, y);
+    } else if (scr_buf && scr_buf->line->str_data && scr_buf->line->str_len) {
+      if (scr_buf == c_r) 
+        break;
+      line = scr_buf->line->str_data;
+      bytes = scr_buf->line->str_len;
+      scr_buf = scr_buf->down;
+    } else {
+      y++;
+      tty_jump(0, y);
+      //printf(" \n");
+      if (scr_buf) scr_buf = scr_buf->down;
+    }
+
+  }
+  //draw cursor row until cursor
+  //this is to calculate cursor position on screen and possible insert
+  if (TT.cur_col) {
+    int x = 0;
+    line = scr_buf->line->str_data;
+    bytes = TT.cur_col;
+    for (; y < TT.screen_height; ) {
+      if (bytes) {
+        x = draw_str_until(&drawn, line, TT.screen_width, bytes);
+        bytes = drawn ? (bytes-drawn) : 0;
+        line = bytes ? (line+drawn) : 0;
       }
+      if (!bytes) break;
+      y++;
+      tty_jump(0, y);
     }
-    else {
-      if (scr_buf == c_r){
-              cy_scr = y;
-              cx_scr = 0;
-              if (TT.vi_mode == 2) {
-                tty_jump(0, y);
-                tty_esc("1m"); //bold
-                printf("%s", il->str_data);
-                cx_scr += il->str_len;
-                tty_esc("0m"); 
-              } else draw_char(' ', 0, y, 1);
+    if (TT.vi_mode == 2 && il->str_len) {
+      line = il->str_data;
+      bytes = il->str_len;
+      cx_scr = x;
+      cy_scr = y;
+      x = draw_str_until(&drawn, line, TT.screen_width-x, bytes);
+      bytes = drawn ? (bytes-drawn) : 0;
+      line = bytes ? (line+drawn) : 0;
+      cx_scr += x;
+      for (; y < TT.screen_height; ) {
+        if (bytes) {
+          x = draw_str_until(&drawn, line, TT.screen_width, bytes);
+          bytes = drawn ? (bytes-drawn) : 0;
+          line = bytes ? (line+drawn) : 0;
+          cx_scr = x;
+        }
+        if (!bytes) break;
+        y++;
+        cy_scr = y;
+        tty_jump(0, y);
       }
-      y++; 
+    } else {
+      cy_scr = y;
+      cx_scr = x;
     }
-    printf("\n");
-    if (scr_buf->down)
-      scr_buf=scr_buf->down;
-    else break;
+    line = scr_buf->line->str_data+TT.cur_col;
+    bytes = scr_buf->line->str_len-TT.cur_col;
+    scr_buf = scr_buf->down;
+    x = draw_str_until(&drawn,line, TT.screen_width-x, bytes);
+    bytes = drawn ? (bytes-drawn) : 0;
+    line = bytes ? (line+drawn) : 0;
+    y++; 
+    tty_jump(0, y);
+  } else {
+    cy_scr = y;
+    cx_scr = 0;
   }
-  for (; y < TT.screen_height; y++) {
-    printf("\n");
+//draw until end
+  for (; y < TT.screen_height; ) {
+    if (line && bytes) {
+      draw_str_until(&drawn, line, TT.screen_width, bytes);
+      bytes = drawn ? (bytes-drawn) : 0;
+      line = bytes ? (line+drawn) : 0;
+      y++; 
+      tty_jump(0, y);
+    } else if (scr_buf && scr_buf->line->str_data && scr_buf->line->str_len) {
+      line = scr_buf->line->str_data;
+      bytes = scr_buf->line->str_len;
+      scr_buf = scr_buf->down;
+    } else {
+      y++;
+      tty_jump(0, y);
+      if (scr_buf) scr_buf = scr_buf->down;
+    }
+
   }
 
   tty_jump(0, TT.screen_height);
@@ -754,7 +786,9 @@ static void draw_page()
     tty_esc("1m");
     tty_jump(0, TT.screen_height+1);
     printf("%s", il->str_data);
+    tty_esc("0m");
   } else tty_jump(cx_scr, cy_scr);
+  
   xflush();
 
 }
@@ -839,9 +873,9 @@ static void adjust_screen_buffer()
 //naive implementation with branches
 //there is better branchless lookup table versions out there
 //1 0xxxxxxx
-//2 110xxxxx  10xxxxxx 
-//3 1110xxxx	10xxxxxx	10xxxxxx
-//4	11110xxx	10xxxxxx	10xxxxxx	10xxxxxx
+//2 110xxxxx  10xxxxxx
+//3 1110xxxx  10xxxxxx  10xxxxxx
+//4 11110xxx  10xxxxxx  10xxxxxx  10xxxxxx
 static int utf8_len(char *str)
 {
   int len = 0;
@@ -860,6 +894,43 @@ static int utf8_len(char *str)
   return len;
 }
 
+//get utf8 length and width at same time
+static int utf8_lnw(int* width, char* str, int bytes)
+{
+  wchar_t wc;
+  int length = 1;
+  *width = 1;
+//  if (str < 0x7F) return length;
+  length = mbtowc(&wc, str, bytes);
+  switch (length) {
+  case -1:
+    mbtowc(0,0,4);
+  case 0:
+    *width = 0;
+    length = 0;
+    break;
+  default:
+  *width = wcwidth(wc);
+  }
+  return length;
+}
+
+//try to estimate width of next "glyph" in terminal buffer
+//combining chars 0x300-0x36F shall be zero width
+static int utf8_width(char *str, int bytes)
+{
+  wchar_t wc;
+  switch (mbtowc(&wc, str, bytes)) {
+  case -1:
+    mbtowc(0,0,4);
+  case 0:
+    return -1;
+  default:
+  return wcwidth(wc);
+  }
+  return 0;
+}
+
 static int utf8_dec(char key, char *utf8_scratch, int *sta_p) 
 {
   int len = 0;
@@ -882,6 +953,34 @@ static int utf8_dec(char key, char *utf8_scratch, int *sta_p)
   return 0;
 }
 
+static int draw_str_until(int *drawn, char *str, int width, int bytes)
+{
+  int rune_width = 0;
+  int rune_bytes = 0;
+  int max_bytes = bytes;
+  int max_width = width;
+  char* end = str;
+  for (;width && bytes;) {
+    rune_bytes = utf8_lnw(&rune_width, end, 4);
+    if (!rune_bytes) break;
+    if (width - rune_width < 0) goto write_bytes; 
+    width -= rune_width;
+    bytes -= rune_bytes;
+    end += rune_bytes;
+  }
+  for (;bytes;) {
+    rune_bytes = utf8_lnw(&rune_width, end, 4);
+    if (!rune_bytes) break;
+    if (rune_width) break;
+    bytes -= rune_bytes;
+    end += rune_bytes;
+  }
+write_bytes:
+  fwrite(str, max_bytes-bytes, 1, stdout);
+  *drawn = max_bytes-bytes;
+  return max_width-width;
+}
+
 static void cur_left()
 {
   if (!TT.cur_col) return;
-- 
2.19.1

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to