Hiroshi Inoue wrote:
> >>>> I need someone with WIN32 experience to review and test this patch.
> >>> I don't understand why cache_locale_time() works on Windows.  It sets
> >>> the LC_CTYPE but does not do any encoding coversion.
> >> Doesn't strftime_win32 do the conversion?
> > 
> > Oh, I now see strftime is redefined as a macro in that C files.  Thanks.
> > 
> >>> Do month and
> >>> day-of-week names not work either, or do they work and the encoding
> >>> conversion for numeric/money, e.g. Euro, it not necessary?
> >> db_strdup does the conversion.
> > 
> > Should we pull the encoding conversion into a separate function and have
> > strftime_win32() and db_strdup() both call it?
> 
> We may be able to pull the conversion WideChars => UTF8 =>
> a PG encoding into an function.

OK, I have created a new function, win32_wchar_to_db_encoding(), to
share the conversion from wide characters to the database encoding.
New patch attached.

> BTW both PGLC_localeconv() and cache_locale_time() save the current
>   LC_CTYPE first and restore them just before returning the functions.
> I'm suspicious if it's OK when errors occur in middle of the functions.

Yea, I added a comment questioning if that is a problem.

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  PG East:  http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v
retrieving revision 1.53
diff -c -c -r1.53 pg_locale.c
*** src/backend/utils/adt/pg_locale.c	27 Feb 2010 20:20:44 -0000	1.53
--- src/backend/utils/adt/pg_locale.c	2 Mar 2010 18:11:41 -0000
***************
*** 4,10 ****
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
--- 4,10 ----
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
***************
*** 96,101 ****
--- 96,109 ----
  static char *IsoLocaleName(const char *);		/* MSVC specific */
  #endif
  
+ #ifdef WIN32
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+ 								const size_t wchars, char *dst, size_t dstlen);
+ static char *db_encoding_strdup(const char *item, const char *str);
+ static size_t strftime_win32(char *dst, size_t dstlen, const wchar_t *format,
+ 							 const struct tm *tm);
+ #endif
+ 
  
  /*
   * pg_perm_setlocale
***************
*** 387,392 ****
--- 395,488 ----
  }
  
  
+ #ifdef	WIN32
+ /*
+  *	Convert wide character string (UTF16 on Win32) to UTF8, and then
+  *	optionally to the db encoding.
+  */
+ static size_t win32_wchar_to_db_encoding(const wchar_t *wbuf,
+ 								const size_t wchars, char *dst, size_t dstlen)
+ {
+ 	int	db_encoding = GetDatabaseEncoding();
+ 	int	utf8len;
+ 
+ 	/* Convert wide string (UTF16) to UTF8 */
+ 	utf8len = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL);
+ 	if (utf8len == 0)
+ 		/* Does this leave LC_CTYPE set incorrectly? */
+ 		elog(ERROR,
+ 			"could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError());
+ 	pfree(wbuf);
+ 
+ 	dst[utf8len] = '\0';
+ 	if (db_encoding != PG_UTF8)
+ 	{
+ 		PG_TRY();
+ 		{
+ 			char *convstr = pg_do_encoding_conversion(dst, utf8len, PG_UTF8, db_encoding);
+ 			if (dst != convstr)
+ 			{
+ 				strlcpy(dst, convstr, dstlen);
+ 				pfree(convstr);
+ 			}
+ 		}
+ 		PG_CATCH();
+ 		{
+ 			FlushErrorState();
+ 			dst[0] = '\0';
+ 		}
+ 		PG_END_TRY();
+ 	}
+ 
+ 	return pg_mbstrlen(dst);
+ }
+ 
+ /*
+  *	This converts the LC_CTYPE-encoded string returned from the
+  *	locale routines to the database encoding.
+  */
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ 	int	db_encoding = GetDatabaseEncoding();
+ 	size_t	wchars, ilen, wclen, dstlen;
+ 	int	bytes_per_char;
+ 	wchar_t	*wbuf;
+ 	char	*dst;
+ 
+ 	if (!str[0])
+ 		return strdup(str);
+ 
+ 	/* allocate wide character string */
+ 	ilen = strlen(str) + 1;
+ 	wclen = ilen * sizeof(wchar_t);
+ 	wbuf = (wchar_t *) palloc(wclen);
+ 
+ 	/* Convert multi-byte string using current LC_CTYPE to a wide-character string */
+ 	wchars = mbstowcs(wbuf, str, ilen);
+ 	if (wchars == (size_t) -1)
+ 		elog(ERROR,
+ 			"could not convert string to wide characters: error %lu", GetLastError());
+ 
+ 	/* allocate target string */
+ 	bytes_per_char = pg_encoding_max_length(PG_UTF8);
+ 	if (pg_encoding_max_length(db_encoding) > bytes_per_char)
+ 		bytes_per_char = pg_encoding_max_length(db_encoding);
+ 	dstlen = wchars * bytes_per_char + 1;
+ 	if ((dst = malloc(dstlen)) == NULL)
+ 		elog(ERROR, "could not allocate a destination buffer");
+ 
+ 	/* Convert wide string (UTF16) to db encoding */
+ 	win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
+ 
+ 	return dst;
+ }
+ #else
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ 	return strdup(str);
+ }
+ #endif /* WIN32 */
+ 
  /*
   * Return the POSIX lconv struct (contains number/money formatting
   * information) with locale information for all categories.
***************
*** 398,403 ****
--- 494,502 ----
  	struct lconv *extlconv;
  	char	   *save_lc_monetary;
  	char	   *save_lc_numeric;
+ #ifdef	WIN32
+ 	char	   *save_lc_ctype = NULL;
+ #endif
  
  	/* Did we do it already? */
  	if (CurrentLocaleConvValid)
***************
*** 413,442 ****
  	if (save_lc_numeric)
  		save_lc_numeric = pstrdup(save_lc_numeric);
  
  	setlocale(LC_MONETARY, locale_monetary);
  	setlocale(LC_NUMERIC, locale_numeric);
! 
! 	/* Get formatting information */
  	extlconv = localeconv();
  
  	/*
! 	 * Must copy all values since restoring internal settings may overwrite
  	 * localeconv()'s results.
  	 */
  	CurrentLocaleConv = *extlconv;
! 	CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol);
! 	CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point);
! 	CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! 	CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep);
! 	CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
! 	CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
  	CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! 	CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
! 	CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign);
! 	CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign);
  	CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
  
! 	/* Try to restore internal settings */
  	if (save_lc_monetary)
  	{
  		setlocale(LC_MONETARY, save_lc_monetary);
--- 512,588 ----
  	if (save_lc_numeric)
  		save_lc_numeric = pstrdup(save_lc_numeric);
  
+ #ifdef	WIN32
+ 	/*
+ 	 *	Ideally, the db server encoding and locale settings would
+ 	 *	always match.  Unfortunately, WIN32 does not support UTF-8
+ 	 *	values for setlocale(), even though PostgreSQL runs fine with
+ 	 *	a UTF-8 encoding on Windows:
+ 	 *
+ 	 *		http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
+ 	 *
+ 	 *	Therefore, we must set LC_CTYPE to match LC_NUMERIC and 
+ 	 *	LC_MONETARY, call localeconv(), and use mbstowcs() to
+ 	 *	convert the locale-aware string, e.g. Euro symbol, which
+ 	 *	is not in UTF-8 to the server encoding.
+ 	 */
+ 
+ 	if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL)
+ 	{
+ 		save_lc_ctype = pstrdup(save_lc_ctype);
+ 		/* Set LC_CTYPE to match LC_MONETARY? */
+ 		if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0)
+ 			setlocale(LC_CTYPE, locale_monetary);
+ 	}
+ #endif
+ 
  	setlocale(LC_MONETARY, locale_monetary);
  	setlocale(LC_NUMERIC, locale_numeric);
! 	/*
! 	 *	Get formatting information for LC_MONETARY, and LC_NUMERIC if they
! 	 *	are the same.
! 	 */
  	extlconv = localeconv();
  
  	/*
! 	 * Must copy all values since restoring internal settings might overwrite
  	 * localeconv()'s results.
  	 */
  	CurrentLocaleConv = *extlconv;
! 
! 	/* The first argument of db_encoding_strdup() is only used on WIN32 */
! 	CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol);
! 	CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol);
! 	CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point);
  	CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! 	CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep);
! 	CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign);
! 	CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign);
  	CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
  
! #ifdef	WIN32
! 	if (save_lc_ctype && pg_strcasecmp(locale_numeric, locale_monetary) != 0)
! 	{
! 		setlocale(LC_CTYPE, locale_numeric);
! 		/* Get formatting information for LC_NUMERIC with matching LC_CTYPE */
! 		extlconv = localeconv();
! 	}
! #endif
! 
! 	CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point);
! 	CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! 	CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep);
! 
! 	/*
! 	 *	Restore internal settings
! 	 */
! #ifdef	WIN32
! 	if (save_lc_ctype)
! 	{
! 		setlocale(LC_CTYPE, save_lc_ctype);
! 		pfree(save_lc_ctype);
! 	}
! #endif
  	if (save_lc_monetary)
  	{
  		setlocale(LC_MONETARY, save_lc_monetary);
***************
*** 455,483 ****
  
  #ifdef WIN32
  /*
!  * On win32, strftime() returns the encoding in CP_ACP, which is likely
!  * different from SERVER_ENCODING. This is especially important in Japanese
!  * versions of Windows which will use SJIS encoding, which we don't support
!  * as a server encoding.
!  *
!  * Replace strftime() with a version that gets the string in UTF16 and then
!  * converts it to the appropriate encoding as necessary.
   *
   * Note that this only affects the calls to strftime() in this file, which are
   * used to get the locale-aware strings. Other parts of the backend use
   * pg_strftime(), which isn't locale-aware and does not need to be replaced.
   */
  static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm * tm)
  {
! 	size_t		len;
  	wchar_t		wbuf[MAX_L10N_DATA];
- 	int			encoding;
  
! 	encoding = GetDatabaseEncoding();
! 
! 	len = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
! 	if (len == 0)
  
  		/*
  		 * strftime call failed - return 0 with the contents of dst
--- 601,628 ----
  
  #ifdef WIN32
  /*
!  * On WIN32, strftime() returns the encoding in CP_ACP (the default
!  * operating system codpage for that computer), which is likely different
!  * from SERVER_ENCODING.  This is especially important in Japanese versions
!  * of Windows which will use SJIS encoding, which we don't support as a
!  * server encoding.
!  *
!  * So, instead of using strftime(), use wcsftime() to return the value in
!  * wide characters (internally UTF16) and then convert it to the appropriate
!  * database encoding.
   *
   * Note that this only affects the calls to strftime() in this file, which are
   * used to get the locale-aware strings. Other parts of the backend use
   * pg_strftime(), which isn't locale-aware and does not need to be replaced.
   */
  static size_t
! strftime_win32(char *dst, size_t dstlen, const wchar_t *format, const struct tm *tm)
  {
! 	size_t		wchars;
  	wchar_t		wbuf[MAX_L10N_DATA];
  
! 	wchars = wcsftime(wbuf, MAX_L10N_DATA, format, tm);
! 	if (wchars == 0)
  
  		/*
  		 * strftime call failed - return 0 with the contents of dst
***************
*** 485,511 ****
  		 */
  		return 0;
  
! 	len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen, NULL, NULL);
! 	if (len == 0)
! 		elog(ERROR,
! 			 "could not convert string to UTF-8:error %lu", GetLastError());
! 
! 	dst[len] = '\0';
! 	if (encoding != PG_UTF8)
! 	{
! 		char	   *convstr = pg_do_encoding_conversion(dst, len, PG_UTF8, encoding);
! 
! 		if (dst != convstr)
! 		{
! 			strlcpy(dst, convstr, dstlen);
! 			len = strlen(dst);
! 		}
! 	}
! 
! 	return len;
  }
  
  #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
  #endif   /* WIN32 */
  
  
--- 630,641 ----
  		 */
  		return 0;
  
! 	return win32_wchar_to_db_encoding(wbuf, wchars, dst, dstlen);
  }
  
+ /* redefine strftime() */
  #define strftime(a,b,c,d) strftime_win32(a,b,L##c,d)
+ 
  #endif   /* WIN32 */
  
  
***************
*** 533,542 ****
  	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
  
  #ifdef WIN32
! 	/* set user's value of ctype locale */
  	save_lc_ctype = setlocale(LC_CTYPE, NULL);
  	if (save_lc_ctype)
  		save_lc_ctype = pstrdup(save_lc_ctype);
  
  	setlocale(LC_CTYPE, locale_time);
  #endif
--- 663,674 ----
  	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
  
  #ifdef WIN32
! 	/* See the WIN32 comment near the top of PGLC_localeconv() */
  	save_lc_ctype = setlocale(LC_CTYPE, NULL);
  	if (save_lc_ctype)
  		save_lc_ctype = pstrdup(save_lc_ctype);
+ 	else
+ 		save_lc_ctype = pstrdup("");
  
  	setlocale(LC_CTYPE, locale_time);
  #endif
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to