Hiroshi Inoue wrote:
> Bruce Momjian wrote:
> > Hiroshi Inoue wrote:
> >> Bruce Momjian wrote:
> >>> Where are we on this issue?
> >> Oops I forgot it completely.
> >> I have a little improved version and would post it tonight.
> > 
> > Ah, very good.  Thanks.
> 
> Attached is an improved version.

I spent many hours on this patch and am attaching an updated version.
I have restructured the code and added many comments, but this is the
main one:

        *  Ideally, the server encoding and locale settings would
        *  always match.  Unfortunately, WIN32 does not support UTF-8
        *  values for setlocale(), even though PostgreSQL runs fine with
        *  a UTF-8 encoding on Windows:
        *
        *      http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
        *
        *  Therefore, we must set LC_CTYPE to match LC_NUMERIC and
        *  LC_MONETARY, call localeconv(), and use mbstowcs() to
        *  convert the locale-aware string, e.g. Euro symbol, which
        *  is not in UTF-8 to the server encoding.

I need someone with WIN32 experience to review and test this patch.

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  PG East:  http://www.enterprisedb.com/community/nav-pg-east-2010.do
Index: src/backend/utils/adt/pg_locale.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/pg_locale.c,v
retrieving revision 1.53
diff -c -c -r1.53 pg_locale.c
*** src/backend/utils/adt/pg_locale.c	27 Feb 2010 20:20:44 -0000	1.53
--- src/backend/utils/adt/pg_locale.c	28 Feb 2010 03:59:14 -0000
***************
*** 4,10 ****
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.53 2010/02/27 20:20:44 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
--- 4,10 ----
   *
   * Portions Copyright (c) 2002-2010, PostgreSQL Global Development Group
   *
!  * $PostgreSQL: pgsql/src/backend/utils/adt/pg_locale.c,v 1.51 2010/01/02 16:57:54 momjian Exp $
   *
   *-----------------------------------------------------------------------
   */
***************
*** 386,391 ****
--- 386,459 ----
  		free(s->positive_sign);
  }
  
+ #ifdef	WIN32
+ /*
+  *	This converts the LC_CTYPE-encoded string returned from the
+  *	locale routines to the database encoding.
+  */
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ 	int	db_encoding = GetDatabaseEncoding();
+ 	size_t	wchars, ilen, wclen, dstlen;
+ 	int	utflen, bytes_per_char;
+ 	wchar_t	*wbuf;
+ 	char	*dst;
+ 
+ 	if (!str[0])
+ 		return strdup(str);
+ 	ilen = strlen(str) + 1;
+ 	wclen = ilen * sizeof(wchar_t);
+ 	wbuf = (wchar_t *) palloc(wclen);
+ 
+ 	/* Convert multi-byte string using current LC_CTYPE to a wide-character string */
+ 	wchars = mbstowcs(wbuf, str, ilen);
+ 	if (wchars == (size_t) -1)
+ 		elog(ERROR,
+ 			"could not convert string to wide characters: error %lu", GetLastError());
+ 
+ 	/* allocate target string */
+ 	bytes_per_char = pg_encoding_max_length(PG_UTF8);
+ 	if (pg_encoding_max_length(db_encoding) > bytes_per_char)
+ 		bytes_per_char = pg_encoding_max_length(db_encoding);
+ 	dstlen = wchars * bytes_per_char + 1;
+ 	if ((dst = malloc(dstlen)) == NULL)
+ 		elog(ERROR, "could not allocate a destination buffer");
+ 
+ 	/* Convert wide string to UTF8 */  
+ 	utflen = WideCharToMultiByte(CP_UTF8, 0, wbuf, wchars, dst, dstlen, NULL, NULL);
+ 	if (utflen == 0)
+ 		elog(ERROR,
+ 			"could not convert string %04x to UTF-8: error %lu", wbuf[0], GetLastError());
+ 	pfree(wbuf);
+ 
+ 	dst[utflen] = '\0';
+ 	if (db_encoding != PG_UTF8)
+ 	{
+ 		PG_TRY();
+ 		{
+ 			char *convstr = pg_do_encoding_conversion(dst, utflen, PG_UTF8, db_encoding);
+ 			if (dst != convstr)
+ 			{
+ 				strlcpy(dst, convstr, dstlen);
+ 				pfree(convstr);
+ 			}
+ 		}
+ 		PG_CATCH();
+ 		{
+ 			FlushErrorState();
+ 			dst[0] = '\0';
+ 		}
+ 		PG_END_TRY();
+ 	}
+ 
+ 	return dst;
+ }
+ #else
+ static char *db_encoding_strdup(const char *item, const char *str)
+ {
+ 	return strdup(str);
+ }
+ #endif /* WIN32 */
  
  /*
   * Return the POSIX lconv struct (contains number/money formatting
***************
*** 398,403 ****
--- 466,475 ----
  	struct lconv *extlconv;
  	char	   *save_lc_monetary;
  	char	   *save_lc_numeric;
+ #ifdef	WIN32
+ 	char	   *save_lc_ctype = NULL;
+ 	bool		lc_ctype_was_null = false;
+ #endif
  
  	/* Did we do it already? */
  	if (CurrentLocaleConvValid)
***************
*** 413,442 ****
  	if (save_lc_numeric)
  		save_lc_numeric = pstrdup(save_lc_numeric);
  
  	setlocale(LC_MONETARY, locale_monetary);
  	setlocale(LC_NUMERIC, locale_numeric);
! 
! 	/* Get formatting information */
  	extlconv = localeconv();
  
  	/*
! 	 * Must copy all values since restoring internal settings may overwrite
  	 * localeconv()'s results.
  	 */
  	CurrentLocaleConv = *extlconv;
! 	CurrentLocaleConv.currency_symbol = strdup(extlconv->currency_symbol);
! 	CurrentLocaleConv.decimal_point = strdup(extlconv->decimal_point);
! 	CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! 	CurrentLocaleConv.thousands_sep = strdup(extlconv->thousands_sep);
! 	CurrentLocaleConv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
! 	CurrentLocaleConv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
  	CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! 	CurrentLocaleConv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
! 	CurrentLocaleConv.negative_sign = strdup(extlconv->negative_sign);
! 	CurrentLocaleConv.positive_sign = strdup(extlconv->positive_sign);
  	CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
  
! 	/* Try to restore internal settings */
  	if (save_lc_monetary)
  	{
  		setlocale(LC_MONETARY, save_lc_monetary);
--- 485,564 ----
  	if (save_lc_numeric)
  		save_lc_numeric = pstrdup(save_lc_numeric);
  
+ #ifdef	WIN32
+ 	/*
+ 	 *	Ideally, the server encoding and locale settings would
+ 	 *	always match.  Unfortunately, WIN32 does not support UTF-8
+ 	 *	values for setlocale(), even though PostgreSQL runs fine with
+ 	 *	a UTF-8 encoding on Windows:
+ 	 *
+ 	 *		http://msdn.microsoft.com/en-us/library/x99tb11d.aspx
+ 	 *
+ 	 *	Therefore, we must set LC_CTYPE to match LC_NUMERIC and 
+ 	 *	LC_MONETARY, call localeconv(), and use mbstowcs() to
+ 	 *	convert the locale-aware string, e.g. Euro symbol, which
+ 	 *	is not in UTF-8 to the server encoding.
+ 	 */
+ 
+ 	/*
+ 	 *	We unconditionally restore LC_CTYPE because we are setting it
+ 	 *	to an unusual value.
+ 	 */
+ 	if ((save_lc_ctype = setlocale(LC_CTYPE, NULL)) != NULL)
+ 		save_lc_ctype = pstrdup(save_lc_ctype);
+ 	else
+ 		/* This is actually the C locale */
+ 		save_lc_ctype = pstrdup("");
+ 
+ 	/* Set LC_CTYPE to match LC_MONETARY? */
+ 	if (pg_strcasecmp(save_lc_ctype, locale_monetary) != 0)
+ 		setlocale(LC_CTYPE, locale_monetary);
+ #endif
+ 
  	setlocale(LC_MONETARY, locale_monetary);
  	setlocale(LC_NUMERIC, locale_numeric);
! 	/*
! 	 *	Get formatting information for LC_MONETARY, and LC_NUMERIC if they
! 	 *	are the same.
! 	 */
  	extlconv = localeconv();
  
  	/*
! 	 * Must copy all values since restoring internal settings might overwrite
  	 * localeconv()'s results.
  	 */
  	CurrentLocaleConv = *extlconv;
! 
! 	/* The first argument of db_encoding_strdup() is only used on WIN32 */
! 	CurrentLocaleConv.currency_symbol = db_encoding_strdup("currency_symbol", extlconv->currency_symbol);
! 	CurrentLocaleConv.int_curr_symbol = db_encoding_strdup("int_curr_symbol", extlconv->int_curr_symbol);
! 	CurrentLocaleConv.mon_decimal_point = db_encoding_strdup("mon_decimal_point", extlconv->mon_decimal_point);
  	CurrentLocaleConv.mon_grouping = strdup(extlconv->mon_grouping);
! 	CurrentLocaleConv.mon_thousands_sep = db_encoding_strdup("mon_thousands_sep", extlconv->mon_thousands_sep);
! 	CurrentLocaleConv.negative_sign = db_encoding_strdup("negative_sign", extlconv->negative_sign);
! 	CurrentLocaleConv.positive_sign = db_encoding_strdup("positive_sign", extlconv->positive_sign);
  	CurrentLocaleConv.n_sign_posn = extlconv->n_sign_posn;
  
! #ifdef	WIN32
! 	if (pg_strcasecmp(locale_numeric, locale_monetary) != 0)
! 	{
! 		setlocale(LC_CTYPE, locale_numeric);
! 		/* Get formatting information for LC_NUMERIC with matching LC_CTYPE */
! 		extlconv = localeconv();
! 	}
! #endif
! 
! 	CurrentLocaleConv.decimal_point = db_encoding_strdup("decimal_point", extlconv->decimal_point);
! 	CurrentLocaleConv.grouping = strdup(extlconv->grouping);
! 	CurrentLocaleConv.thousands_sep = db_encoding_strdup("thousands_sep", extlconv->thousands_sep);
! 
! 	/*
! 	 *	Restore internal settings
! 	 */
! #ifdef	WIN32
! 	setlocale(LC_CTYPE, save_lc_ctype);
! 	pfree(save_lc_ctype);
! #endif
  	if (save_lc_monetary)
  	{
  		setlocale(LC_MONETARY, save_lc_monetary);
***************
*** 533,542 ****
  	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
  
  #ifdef WIN32
! 	/* set user's value of ctype locale */
  	save_lc_ctype = setlocale(LC_CTYPE, NULL);
  	if (save_lc_ctype)
  		save_lc_ctype = pstrdup(save_lc_ctype);
  
  	setlocale(LC_CTYPE, locale_time);
  #endif
--- 655,666 ----
  	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
  
  #ifdef WIN32
! 	/* See the WIN32 comment near the top of PGLC_localeconv() */
  	save_lc_ctype = setlocale(LC_CTYPE, NULL);
  	if (save_lc_ctype)
  		save_lc_ctype = pstrdup(save_lc_ctype);
+ 	else
+ 		save_lc_ctype = pstrdup("");
  
  	setlocale(LC_CTYPE, locale_time);
  #endif
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to