On Jan 26 22:09, Charles Wilson wrote: > On 1/24/2011 10:09 PM, Charles Wilson wrote: > > Now, since there has not yet been an updated upstream release of > > libiconv, my first step would be to simply rebuild our existing > > libiconv-1.13.1 on a platform with current cygwin (1.7.7-1), and try the > > test case again. > > Rebuilt libiconv against 20110117 snapshot. Built test case. Still see > erroneous behavior: > > iconv: 138 <Invalid or incomplete multibyte or wide character> > in = <Liian pitkä sana>, inbuf = <ä sana>, inbytesleft = 7, outbytesleft > = 492 > iconv: 138 <Invalid or incomplete multibyte or wide character> > in = <Liian pitkä sana>, inbuf = <ä sana>, inbytesleft = 7, outbytesleft > = 492 > iconv: 138 <Invalid or incomplete multibyte or wide character> > in = <Liian pitkä sana>, inbuf = <ä sana>, inbytesleft = 7, outbytesleft > = 492 > in = <Liian pitkä sana>, inbuf = <>, inbytesleft = 0, outbytesleft = 480
I got it working. The major reason was that the conversion to wchar_t was broken due to the #if expressions in lib/iconv.c and lib/iconv_open1.h: #if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) This should be #if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if you use the latests snapshot I *just* uploaded. It's the second one today since the definition in /usr/include/features.h wasn't picked up at all. So I had to change the newlib headers similar to what Linux does to get it working. With 1.7.7 you would have to define #if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ Other than that, here's the full patchset which I applied to let libiconv work more POSIXy on Cygwin. I tested especially that the Linux code works fine on Cygwin as well. Use the patch at you own leasure. If you have any questiosn, feel free to ask. --- libiconv-1.13.1.orig/lib/relocatable.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/lib/relocatable.c 2011-01-27 11:37:16.748956079 +0100 @@ -43,7 +43,7 @@ # include "xalloc.h" #endif -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -70,8 +70,8 @@ ISSLASH(C) tests whether C is a directory separator character. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Win32, Cygwin, OS/2, DOS */ +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ + /* Win32, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \ @@ -281,7 +281,7 @@ compute_curr_prefix (const char *orig_in /* Full pathname of shared library, or NULL. */ static char *shared_library_fullname; -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ /* Determine the full pathname of the shared library when it is loaded. */ @@ -303,37 +303,20 @@ DllMain (HINSTANCE module_handle, DWORD /* Shouldn't happen. */ return FALSE; - { -#if defined __CYGWIN__ - /* On Cygwin, we need to convert paths coming from Win32 system calls - to the Unix-like slashified notation. */ - static char location_as_posix_path[2 * MAX_PATH]; - /* There's no error return defined for cygwin_conv_to_posix_path. - See cygwin-api/func-cygwin-conv-to-posix-path.html. - Does it overflow the buffer of expected size MAX_PATH or does it - truncate the path? I don't know. Let's catch both. */ - cygwin_conv_to_posix_path (location, location_as_posix_path); - location_as_posix_path[MAX_PATH - 1] = '\0'; - if (strlen (location_as_posix_path) >= MAX_PATH - 1) - /* A sign of buffer overflow or path truncation. */ - return FALSE; - shared_library_fullname = strdup (location_as_posix_path); -#else - shared_library_fullname = strdup (location); -#endif - } + shared_library_fullname = strdup (location); } return TRUE; } -#else /* Unix except Cygwin */ +#else /* Unix */ static void find_shared_library_fullname () { -#if defined __linux__ && __GLIBC__ >= 2 - /* Linux has /proc/self/maps. glibc 2 has the getline() function. */ +#if (defined __linux__ && __GLIBC__ >= 2) || defined __CYGWIN__ + /* Linux has /proc/self/maps. glibc 2 has the getline() function. + Cygwin as well. */ FILE *fp; /* Open the current process' maps file. It describes one VMA per line. */ @@ -378,7 +361,7 @@ find_shared_library_fullname () #endif } -#endif /* (WIN32 or Cygwin) / (Unix except Cygwin) */ +#endif /* WIN32 / Unix */ /* Return the full pathname of the current shared library. Return NULL if unknown. @@ -386,7 +369,7 @@ find_shared_library_fullname () static char * get_shared_library_fullname () { -#if !(defined _WIN32 || defined __WIN32__ || defined __CYGWIN__) +#if !(defined _WIN32 || defined __WIN32__) static bool tried_find_shared_library_fullname; if (!tried_find_shared_library_fullname) { --- libiconv-1.13.1.orig/lib/iconv.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/lib/iconv.c 2011-01-27 12:46:21.544296281 +0100 @@ -550,7 +550,7 @@ const char * iconv_canonicalize (const c if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { index = ei_ucs4internal; break; --- libiconv-1.13.1.orig/lib/iconv_open1.h 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/lib/iconv_open1.h 2011-01-27 12:47:03.119371056 +0100 @@ -98,7 +98,7 @@ if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { to_index = ei_ucs4internal; break; @@ -174,7 +174,7 @@ if (ap->encoding_index == ei_local_wchar_t) { /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode. This is also the case on native Woe32 systems. */ -#if __STDC_ISO_10646__ || ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) +#if __STDC_ISO_10646__ || defined _WIN32 || defined __WIN32__ if (sizeof(wchar_t) == 4) { from_index = ei_ucs4internal; break; --- libiconv-1.13.1.orig/libcharset/lib/localcharset.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/libcharset/lib/localcharset.c 2011-01-27 11:53:33.201852883 +0100 @@ -52,10 +52,6 @@ # include <locale.h> # endif # endif -# ifdef __CYGWIN__ -# define WIN32_LEAN_AND_MEAN -# include <windows.h> -# endif #elif defined WIN32_NATIVE # define WIN32_LEAN_AND_MEAN # include <windows.h> @@ -76,7 +72,7 @@ # include "configmake.h" #endif -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ /* Win32, Cygwin, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') #endif @@ -117,7 +113,7 @@ get_charset_aliases (void) cp = charset_aliases; if (cp == NULL) { -#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__) +#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE) FILE *fp; const char *dir; const char *base = "charset.alias"; @@ -276,7 +272,7 @@ get_charset_aliases (void) "DECKOREAN" "\0" "EUC-KR" "\0"; # endif -# if defined WIN32_NATIVE || defined __CYGWIN__ +# if defined WIN32_NATIVE /* To avoid the troubles of installing a separate file in the same directory as the DLL and of retrieving the DLL's directory at runtime, simply inline the aliases here. */ @@ -332,55 +328,14 @@ locale_charset (void) # if HAVE_LANGINFO_CODESET - /* Most systems support nl_langinfo (CODESET) nowadays. */ - codeset = nl_langinfo (CODESET); - -# ifdef __CYGWIN__ - /* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always - returns "US-ASCII". As long as this is not fixed, return the suffix - of the locale name from the environment variables (if present) or - the codepage as a number. */ - if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0) - { - const char *locale; - static char buf[2 + 10 + 1]; + /* Most systems support nl_langinfo (CODESET) nowadays. + + POSIX allows that the returned pointer may point to a static area that + may be overwritten by subsequent calls to setlocale or nl_langinfo. */ + static char codeset_buf[64]; - locale = getenv ("LC_ALL"); - if (locale == NULL || locale[0] == '\0') - { - locale = getenv ("LC_CTYPE"); - if (locale == NULL || locale[0] == '\0') - locale = getenv ("LANG"); - } - if (locale != NULL && locale[0] != '\0') - { - /* If the locale name contains an encoding after the dot, return - it. */ - const char *dot = strchr (locale, '.'); - - if (dot != NULL) - { - const char *modifier; - - dot++; - /* Look for the possible @... trailer and remove it, if any. */ - modifier = strchr (dot, '@'); - if (modifier == NULL) - return dot; - if (modifier - dot < sizeof (buf)) - { - memcpy (buf, dot, modifier - dot); - buf [modifier - dot] = '\0'; - return buf; - } - } - } - - /* Woe32 has a function returning the locale's codepage as a number. */ - sprintf (buf, "CP%u", GetACP ()); - codeset = buf; - } -# endif + codeset_buf[0] = '\0'; + codeset = strncat (codeset_buf, nl_langinfo (CODESET), sizeof (codeset_buf)); # else --- libiconv-1.13.1.orig/libcharset/lib/relocatable.c 2009-06-21 13:17:33.000000000 +0200 +++ libiconv-1.13.1/libcharset/lib/relocatable.c 2011-01-27 11:37:43.626054538 +0100 @@ -43,7 +43,7 @@ # include "xalloc.h" #endif -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -70,8 +70,8 @@ ISSLASH(C) tests whether C is a directory separator character. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Win32, Cygwin, OS/2, DOS */ +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ + /* Win32, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \ @@ -290,7 +290,7 @@ compute_curr_prefix (const char *orig_in /* Full pathname of shared library, or NULL. */ static char *shared_library_fullname; -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ /* Determine the full pathname of the shared library when it is loaded. */ @@ -313,35 +313,19 @@ DllMain (HINSTANCE module_handle, DWORD return FALSE; { -#if defined __CYGWIN__ - /* On Cygwin, we need to convert paths coming from Win32 system calls - to the Unix-like slashified notation. */ - static char location_as_posix_path[2 * MAX_PATH]; - /* There's no error return defined for cygwin_conv_to_posix_path. - See cygwin-api/func-cygwin-conv-to-posix-path.html. - Does it overflow the buffer of expected size MAX_PATH or does it - truncate the path? I don't know. Let's catch both. */ - cygwin_conv_to_posix_path (location, location_as_posix_path); - location_as_posix_path[MAX_PATH - 1] = '\0'; - if (strlen (location_as_posix_path) >= MAX_PATH - 1) - /* A sign of buffer overflow or path truncation. */ - return FALSE; - shared_library_fullname = strdup (location_as_posix_path); -#else shared_library_fullname = strdup (location); -#endif } } return TRUE; } -#else /* Unix except Cygwin */ +#else /* Unix */ static void find_shared_library_fullname () { -#if defined __linux__ && __GLIBC__ >= 2 +#if (defined __linux__ && __GLIBC__ >= 2) || defined __CYGWIN__ /* Linux has /proc/self/maps. glibc 2 has the getline() function. */ FILE *fp; @@ -387,7 +371,7 @@ find_shared_library_fullname () #endif } -#endif /* (WIN32 or Cygwin) / (Unix except Cygwin) */ +#endif /* WIN32 / Unix */ /* Return the full pathname of the current shared library. Return NULL if unknown. @@ -395,7 +379,7 @@ find_shared_library_fullname () static char * get_shared_library_fullname () { -#if !(defined _WIN32 || defined __WIN32__ || defined __CYGWIN__) +#if !(defined _WIN32 || defined __WIN32__) static bool tried_find_shared_library_fullname; if (!tried_find_shared_library_fullname) { --- libiconv-1.13.1.orig/srclib/errno.in.h 2009-06-21 13:31:08.000000000 +0200 +++ libiconv-1.13.1/srclib/errno.in.h 2011-01-27 11:39:52.666924514 +0100 @@ -30,7 +30,7 @@ /* On native Windows platforms, many macros are not defined. */ -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 || defined __WIN32__ /* POSIX says that EAGAIN and EWOULDBLOCK may have the same value. */ # define EWOULDBLOCK EAGAIN --- libiconv-1.13.1.orig/srclib/progreloc.c 2009-06-21 13:31:08.000000000 +0200 +++ libiconv-1.13.1/srclib/progreloc.c 2011-01-27 11:39:01.062575765 +0100 @@ -38,7 +38,7 @@ # define WIN32_NATIVE #endif -#if defined WIN32_NATIVE || defined __CYGWIN__ +#if defined WIN32_NATIVE # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -64,8 +64,8 @@ ISSLASH(C) tests whether C is a directory separator character. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Win32, Cygwin, OS/2, DOS */ +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ + /* Win32, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \ @@ -90,7 +90,7 @@ #if ENABLE_RELOCATABLE -#ifdef __linux__ +#if defined __linux__ || defined __CYGWIN__ /* File descriptor of the executable. (Only used to verify that we find the correct executable.) */ static int executable_fd = -1; @@ -100,12 +100,12 @@ static int executable_fd = -1; static bool maybe_executable (const char *filename) { - /* Woe32 lacks the access() function, but Cygwin doesn't. */ -#if !(defined WIN32_NATIVE && !defined __CYGWIN__) + /* Woe32 lacks the access() function. */ +#if !(defined WIN32_NATIVE) if (access (filename, X_OK) < 0) return false; -#ifdef __linux__ +#if defined __linux__ || defined __CYGWIN__ if (executable_fd >= 0) { /* If we already have an executable_fd, check that filename points to @@ -136,7 +136,7 @@ maybe_executable (const char *filename) static char * find_executable (const char *argv0) { -#if defined WIN32_NATIVE || defined __CYGWIN__ +#if defined WIN32_NATIVE char location[MAX_PATH]; int length = GetModuleFileName (NULL, location, sizeof (location)); if (length < 0) @@ -144,36 +144,13 @@ find_executable (const char *argv0) if (!IS_PATH_WITH_DIR (location)) /* Shouldn't happen. */ return NULL; - { -#if defined __CYGWIN__ - /* cygwin-1.5.13 (2005-03-01) or newer would also allow a Linux-like - implementation: readlink of "/proc/self/exe". But using the - result of the Win32 system call is simpler and is consistent with the - code in relocatable.c. */ - /* On Cygwin, we need to convert paths coming from Win32 system calls - to the Unix-like slashified notation. */ - static char location_as_posix_path[2 * MAX_PATH]; - /* There's no error return defined for cygwin_conv_to_posix_path. - See cygwin-api/func-cygwin-conv-to-posix-path.html. - Does it overflow the buffer of expected size MAX_PATH or does it - truncate the path? I don't know. Let's catch both. */ - cygwin_conv_to_posix_path (location, location_as_posix_path); - location_as_posix_path[MAX_PATH - 1] = '\0'; - if (strlen (location_as_posix_path) >= MAX_PATH - 1) - /* A sign of buffer overflow or path truncation. */ - return NULL; - /* Call canonicalize_file_name, because Cygwin supports symbolic links. */ - return canonicalize_file_name (location_as_posix_path); -#else - return xstrdup (location); -#endif - } -#else /* Unix && !Cygwin */ -#ifdef __linux__ - /* The executable is accessible as /proc/<pid>/exe. In newer Linux - versions, also as /proc/self/exe. Linux >= 2.1 provides a symlink - to the true pathname; older Linux versions give only device and ino, - enclosed in brackets, which we cannot use here. */ + return xstrdup (location); +#else /* Unix */ +#if defined __linux__ || defined __CYGWIN__ + /* The executable is accessible as /proc/<pid>/exe. In Cygwin and in + newer Linux versions, also as /proc/self/exe. Linux >= 2.1 provides + a symlink to the true pathname; older Linux versions give only device + and ino, enclosed in brackets, which we cannot use here. */ { char *link; --- libiconv-1.13.1.orig/srclib/stdio-write.c 2009-06-21 13:31:08.000000000 +0200 +++ libiconv-1.13.1/srclib/stdio-write.c 2011-01-27 11:38:26.831997673 +0100 @@ -29,7 +29,7 @@ error EINVAL. This write() function is at the basis of the function which flushes the buffer of a FILE stream. */ -# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +# if defined _WIN32 || defined __WIN32__ # include <errno.h> # include <signal.h> --- libiconv-1.13.1.orig/srclib/relocatable.c 2009-06-21 13:31:08.000000000 +0200 +++ libiconv-1.13.1/srclib/relocatable.c 2011-01-27 11:38:19.852491486 +0100 @@ -43,7 +43,7 @@ # include "xalloc.h" #endif -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ # define WIN32_LEAN_AND_MEAN # include <windows.h> #endif @@ -70,8 +70,8 @@ ISSLASH(C) tests whether C is a directory separator character. IS_PATH_WITH_DIR(P) tests whether P contains a directory specification. */ -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__ - /* Win32, Cygwin, OS/2, DOS */ +#if defined _WIN32 || defined __WIN32__ || defined __EMX__ || defined __DJGPP__ + /* Win32, OS/2, DOS */ # define ISSLASH(C) ((C) == '/' || (C) == '\\') # define HAS_DEVICE(P) \ ((((P)[0] >= 'A' && (P)[0] <= 'Z') || ((P)[0] >= 'a' && (P)[0] <= 'z')) \ @@ -290,7 +290,7 @@ compute_curr_prefix (const char *orig_in /* Full pathname of shared library, or NULL. */ static char *shared_library_fullname; -#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ /* Determine the full pathname of the shared library when it is loaded. */ @@ -312,31 +312,13 @@ DllMain (HINSTANCE module_handle, DWORD /* Shouldn't happen. */ return FALSE; - { -#if defined __CYGWIN__ - /* On Cygwin, we need to convert paths coming from Win32 system calls - to the Unix-like slashified notation. */ - static char location_as_posix_path[2 * MAX_PATH]; - /* There's no error return defined for cygwin_conv_to_posix_path. - See cygwin-api/func-cygwin-conv-to-posix-path.html. - Does it overflow the buffer of expected size MAX_PATH or does it - truncate the path? I don't know. Let's catch both. */ - cygwin_conv_to_posix_path (location, location_as_posix_path); - location_as_posix_path[MAX_PATH - 1] = '\0'; - if (strlen (location_as_posix_path) >= MAX_PATH - 1) - /* A sign of buffer overflow or path truncation. */ - return FALSE; - shared_library_fullname = strdup (location_as_posix_path); -#else - shared_library_fullname = strdup (location); -#endif - } + shared_library_fullname = strdup (location); } return TRUE; } -#else /* Unix except Cygwin */ +#else /* Unix */ static void find_shared_library_fullname () @@ -387,7 +369,7 @@ find_shared_library_fullname () #endif } -#endif /* (WIN32 or Cygwin) / (Unix except Cygwin) */ +#endif /* WIN32 / Unix */ /* Return the full pathname of the current shared library. Return NULL if unknown. @@ -395,7 +377,7 @@ find_shared_library_fullname () static char * get_shared_library_fullname () { -#if !(defined _WIN32 || defined __WIN32__ || defined __CYGWIN__) +#if !(defined _WIN32 || defined __WIN32__ ) static bool tried_find_shared_library_fullname; if (!tried_find_shared_library_fullname) { --- libiconv-1.13.1.orig/srclib/sigprocmask.c 2009-06-21 13:31:08.000000000 +0200 +++ libiconv-1.13.1/srclib/sigprocmask.c 2011-01-27 11:37:56.450147229 +0100 @@ -46,7 +46,7 @@ /* On native Windows, as of 2008, the signal SIGABRT_COMPAT is an alias for the signal SIGABRT. Only one signal handler is stored for both SIGABRT and SIGABRT_COMPAT. SIGABRT_COMPAT is not a signal of its own. */ -#if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__ +#if defined _WIN32 || defined __WIN32__ # undef SIGABRT_COMPAT # define SIGABRT_COMPAT 6 #endif -- Corinna Vinschen Please, send mails regarding Cygwin to Cygwin Project Co-Leader cygwin AT cygwin DOT com Red Hat -- Problem reports: http://cygwin.com/problems.html FAQ: http://cygwin.com/faq/ Documentation: http://cygwin.com/docs.html Unsubscribe info: http://cygwin.com/ml/#unsubscribe-simple