Hi,

The attaching patch fixes ed with the none UTF-8 handling.

Problem 1:
0. Invoke a terminal emulator on zh_CN.GB18030.
1. Create a file name of the code point 0x5c.
% echo "hello" > `/usr/bin/printf "\x81\x5c"`
2. Invoke ed with the created file name.
% env LC_MESSAGES=C ed `/usr/bin/printf "\x81\x5c"`
multi-byte: No such file or directory

Then ed outputs "No such file or directory".
The 0x 81 5c is a multi-byte char of GB18030 encoding.
My patch fixes strip_escapes() not to mistake a multi-byte and the single '\'.

Problem2:
1. Create a file with the content of 0x5b.
% /usr/bin/printf "abc\x81\x5bdef" > hello
% cat hello
abc乕def
2. Invoke ed with the created file and search the char with /.../ .
% ed hello
/ab/
abc乕def
/乕d/
?

The ed outputs '?'.
The 0x 81 5b is a char of GB18030 encoding.
My patch fixes extract_pattern() not to mistake the multi-byte and single '['.

I also fixed read_file()/write_file() to see the single byte '!' only.

Thanks,
fujiwara
--- ed/configure.in.orig        2009-07-21 13:25:22.000000000 +0900
+++ ed/configure.in     2009-07-21 13:25:39.000000000 +0900
@@ -10,7 +10,7 @@ AC_ISC_POSIX
 AC_PROG_CC
 AC_C_CONST
 AC_HEADER_STDC
-AC_CHECK_HEADERS(limits.h memory.h string.h unistd.h locale.h)
+AC_CHECK_HEADERS(limits.h memory.h string.h unistd.h locale.h wchar.h)
 AC_CHECK_FUNCS(setbuffer sigsetjmp sigaction strerror)
 AC_FUNC_VPRINTF
 AC_FUNC_ALLOCA
--- ed/ed.h.orig        2009-07-17 19:06:29.000000000 +0900
+++ ed/ed.h     2009-07-21 13:29:05.000000000 +0900
@@ -69,6 +69,10 @@ long strtol ();
 #define memcmp(s1, s2, n) bcmp ((s1), (s2), (n))
 #endif /* not STDC_HEADERS and not HAVE_STRING_H */
 
+#if HAVE_WCHAR_H
+#include <wchar.h>
+#endif
+
 /* #include <sys/types.h> */
 
 #include "pathmax.h"
--- ed/io.c.orig        2002-05-18 04:54:26.000000000 +0900
+++ ed/io.c     2009-07-21 13:42:55.000000000 +0900
@@ -35,9 +35,20 @@ read_file (fn, n)
 {
   FILE *fp;
   long size;
+  int is_bang = 0;
+#ifdef HAVE_WCHAR_H
+  mbstate_t mbs;
+#endif
 
 
-  fp = (*fn == '!') ? popen (fn + 1, "r") : fopen (strip_escapes (fn), "r");
+#ifdef HAVE_WCHAR_H
+  memset (&mbs, 0, sizeof (mbs));
+  is_bang = ((mbrlen (fn, MB_CUR_MAX, &mbs) == 1) &&
+            (*fn == '!')) ? 1 : 0;
+#else
+  is_bang = (*fn == '!') ? 1 : 0;
+#endif
+  fp = is_bang ? popen (fn + 1, "r") : fopen (strip_escapes (fn), "r");
   if (fp == NULL)
     {
       fprintf (stderr, "%s: %s\n", fn, strerror (errno));
@@ -46,7 +57,7 @@ read_file (fn, n)
     }
   else if ((size = read_stream (fp, n)) < 0)
     return ERR;
-  else if (((*fn == '!') ? pclose (fp) : fclose (fp)) < 0)
+  else if ((is_bang ? pclose (fp) : fclose (fp)) < 0)
     {
       fprintf (stderr, "%s: %s\n", fn, strerror (errno));
       sprintf (errmsg, "Cannot close input file");
@@ -156,8 +167,19 @@ write_file (fn, mode, n, m)
 {
   FILE *fp;
   long size;
-
-  fp = (*fn == '!') ? popen (fn + 1, "w") : fopen (strip_escapes (fn), mode);
+  int is_bang = 0;
+#ifdef HAVE_WCHAR_H
+  mbstate_t mbs;
+#endif
+
+#ifdef HAVE_WCHAR_H
+  memset (&mbs, 0, sizeof (mbs));
+  is_bang = ((mbrlen (fn, MB_CUR_MAX, &mbs) == 1) &&
+             (*fn == '!')) ? 1 : 0;
+#else
+  is_bang = (*fn == '!') ? 1 : 0;
+#endif
+  fp = is_bang ? popen (fn + 1, "w") : fopen (strip_escapes (fn), mode);
   if (fp == NULL)
     {
       fprintf (stderr, "%s: %s\n", fn, strerror (errno));
@@ -166,7 +188,7 @@ write_file (fn, mode, n, m)
     }
   else if ((size = write_stream (fp, n, m)) < 0)
     return ERR;
-  else if (((*fn == '!') ? pclose (fp) : fclose (fp)) < 0)
+  else if ((is_bang ? pclose (fp) : fclose (fp)) < 0)
     {
       fprintf (stderr, "%s: %s\n", fn, strerror (errno));
       sprintf (errmsg, "Cannot close output file");
--- ed/main.c.orig      2009-07-21 13:50:46.000000000 +0900
+++ ed/main.c   2009-07-21 14:00:20.000000000 +0900
@@ -1643,13 +1643,33 @@ strip_escapes (s)
 {
   static char *file = NULL;
   static int filesz = 0;
+#if HAVE_WCHAR_H
+  int char_len;
+  mbstate_t mbs;
+#endif
 
   int i = 0;
 
   REALLOC (file, filesz, PATH_MAX + 1, NULL);
   /* assert: no trailing escape */
-  while (file[i++] = (*s == '\\') ? *++s : *s)
-    s++;
+  while( *s )
+    {
+#if HAVE_WCHAR_H
+      memset (&mbs, 0, sizeof (mbs));
+      char_len = mbrlen (s, MB_CUR_MAX, &mbs);
+      if (char_len > 1)
+        {
+          while (char_len > 0)
+            {
+              file[i++] = *s++;
+              char_len--;
+            }
+          continue;
+        }
+#endif
+      file[i++] = ( (*s == '\\' ) ? *++s : *s );
+      s++;
+    }
   return file;
 }
 
--- ed/re.c.orig        2002-05-18 04:54:26.000000000 +0900
+++ ed/re.c     2009-07-21 13:49:40.000000000 +0900
@@ -83,8 +83,21 @@ extract_pattern (delimiter)
 
   char *nd;
   int len;
+#if HAVE_WCHAR_H
+  mbstate_t mbs;
+#endif
 
   for (nd = ibufp; *nd != delimiter && *nd != '\n'; nd++)
+    {
+#if HAVE_WCHAR_H
+    memset (&mbs, 0, sizeof (mbs));
+    len = mbrlen (nd, MB_CUR_MAX, &mbs);
+    if (len > 1)
+      {
+        nd += (len - 1);
+        continue;
+      }
+#endif
     switch (*nd)
       {
       default:
@@ -104,6 +117,7 @@ extract_pattern (delimiter)
          }
        break;
       }
+    }
   len = nd - ibufp;
   REALLOC (lhbuf, lhbufsz, len + 1, NULL);
   memcpy (lhbuf, ibufp, len);
_______________________________________________
bug-ed mailing list
bug-ed@gnu.org
http://lists.gnu.org/mailman/listinfo/bug-ed

Reply via email to