[HACKERS] regression test crashes at tsearch

Hiroshi Inoue Tue, 17 Feb 2009 15:57:33 -0800

Hi,

I see a regression test failure in my mingw-vista port
when I invoke the command
  make check MULTIBYTE=euc_jp NO_LOCALE=yes
.
It causes a crash at tsearch.
The crash seems to occur when the server encoding isn't
UTF-8 with no locale.
The attached is a patch to avoid the crash.


regards,
Hiroshi Inoue

Index: backend/utils/mb/mbutils.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/mb/mbutils.c,v
retrieving revision 1.78
diff -c -r1.78 mbutils.c
*** backend/utils/mb/mbutils.c  22 Jan 2009 10:09:48 -0000      1.78
--- backend/utils/mb/mbutils.c  17 Feb 2009 21:59:26 -0000
***************
*** 575,580 ****
--- 575,584 ----
  wchar2char(char *to, const wchar_t *from, size_t tolen)
  {
        size_t result;
+ #ifdef        WIN32
+       int     encoding = GetDatabaseEncoding();
+       bool    useWcstombs = !(encoding == PG_UTF8 || lc_ctype_is_c());
+ #endif        
        
        if (tolen == 0)
                return 0;
***************
*** 584,602 ****
         * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
         * and for some reason mbstowcs and wcstombs won't do this for us,
         * so we use MultiByteToWideChar().
         */
!       if (GetDatabaseEncoding() == PG_UTF8)
        {
!               result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
                                                                NULL, NULL);
                /* A zero return is failure */
!               if (result <= 0)
                        result = -1;
                else
                {
-                       Assert(result <= tolen);
                        /* Microsoft counts the zero terminator in the result */
!                       result--;
                }
        }
        else
--- 588,624 ----
         * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
         * and for some reason mbstowcs and wcstombs won't do this for us,
         * so we use MultiByteToWideChar().
+        * Also note wcstombs/mbstowcs is unavailable when LC_CTYPE is C.
         */
!       if (!useWcstombs)
        {
!               int     utf8len = tolen;
!               char *utf8str = to;
!               
!               if (encoding != PG_UTF8)
!               {
!                       utf8len = pg_encoding_max_length(PG_UTF8) * tolen;
!                       utf8str = palloc(utf8len + 1);
!               }
!               utf8len = WideCharToMultiByte(CP_UTF8, 0, from, -1, utf8str, 
utf8len,
                                                                NULL, NULL);
                /* A zero return is failure */
!               if (utf8len <= 0)
                        result = -1;
                else
                {
                        /* Microsoft counts the zero terminator in the result */
!                       result = utf8len - 1;
!                       if (encoding != PG_UTF8)
!                       {
!                               char *mbstr = 
pg_do_encoding_conversion((unsigned char *) utf8str, result, PG_UTF8, encoding);
!                               result = strlcpy(to, mbstr, tolen);
!                               if (utf8str != to)
!                                       pfree(utf8str);
!                               if (mbstr != utf8str)
!                                       pfree(mbstr);
!                       }
!                       Assert(result <= tolen);
                }
        }
        else
***************
*** 618,637 ****
  char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
  {
        size_t          result;
  
        if (tolen == 0)
                return 0;
  
  #ifdef WIN32
!       /* See WIN32 "Unicode" comment above */
!       if (GetDatabaseEncoding() == PG_UTF8)
        {
                /* Win32 API does not work for zero-length input */
!               if (fromlen == 0)
                        result = 0;
                else
                {
!                       result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, 
to, tolen - 1);
                        /* A zero return is failure */
                        if (result == 0)
                                result = -1;
--- 640,672 ----
  char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
  {
        size_t          result;
+ #ifdef        WIN32
+       int     encoding = GetDatabaseEncoding();
+       bool    useMbstowcs = !(encoding == PG_UTF8 || lc_ctype_is_c());
+ #endif        
  
        if (tolen == 0)
                return 0;
  
  #ifdef WIN32
!       if (!useMbstowcs)
        {
+               int     utf8len = fromlen;
+               unsigned char *utf8str = (unsigned char *) from;
+               
+               if (encoding != PG_UTF8)
+               {
+                       utf8str = pg_do_encoding_conversion(from, fromlen, 
encoding, PG_UTF8);
+                       if (utf8str != from)
+                               utf8len = strlen(utf8str);
+               }
+               /* See WIN32 "Unicode" comment above */
                /* Win32 API does not work for zero-length input */
!               if (utf8len == 0)
                        result = 0;
                else
                {
!                       result = MultiByteToWideChar(CP_UTF8, 0, utf8str, 
utf8len, to, tolen - 1);
                        /* A zero return is failure */
                        if (result == 0)
                                result = -1;
***************
*** 643,648 ****
--- 678,685 ----
                        /* Append trailing null wchar (MultiByteToWideChar() 
does not) */
                        to[result] = 0;
                }
+               if (utf8str != from)
+                       pfree(utf8str);
        }
        else
  #endif   /* WIN32 */

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] regression test crashes at tsearch

Reply via email to