Re: [PATCHES] Simplify formatting.c

Bruce Momjian Mon, 23 Jun 2008 12:29:00 -0700

Bruce Momjian wrote:
> > I am starting to think that the simplest case is to keep the single-copy
> > version in there for single-byte encodings and not worry about the
> > overhead of the multi-byte case.
> 
> My new idea is if we pass the length to str_initcap, we can eliminate
> the string copy from text to char *.  That leaves us with just one extra
> string copy from char * to text, which seems acceptable.  We still have
> the wide char copy but I don't see any easy way to eliminate that
> because the multi-byte code is complex and not something we want to
> duplicate.


I ended up going in this direction, and did the same for upper and
lower.  Patch attached and applied.   I don't see any other cleanups in
this area.

-- 
  Bruce Momjian  <[EMAIL PROTECTED]>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

Index: src/backend/utils/adt/formatting.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/formatting.c,v
retrieving revision 1.142
diff -c -c -r1.142 formatting.c
*** src/backend/utils/adt/formatting.c	17 Jun 2008 16:09:06 -0000	1.142
--- src/backend/utils/adt/formatting.c	23 Jun 2008 19:24:35 -0000
***************
*** 925,933 ****
  static char *str_numth(char *dest, char *num, int type);
  static int	strspace_len(char *str);
  static int	strdigits_len(char *str);
- static char *str_toupper(char *buff);
- static char *str_tolower(char *buff);
- static char *str_initcap(char *buff);
  
  static int	seq_search(char *name, char **array, int type, int max, int *len);
  static void do_to_timestamp(text *date_txt, text *fmt,
--- 925,930 ----
***************
*** 1424,1435 ****
  	return dest;
  }
  
  /* ----------
!  * Convert string to upper case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_toupper(char *buff)
  {
  	char		*result;
  
--- 1421,1444 ----
  	return dest;
  }
  
+ /*
+  * If the system provides the needed functions for wide-character manipulation
+  * (which are all standardized by C99), then we implement upper/lower/initcap
+  * using wide-character functions, if necessary.  Otherwise we use the
+  * traditional <ctype.h> functions, which of course will not work as desired
+  * in multibyte character sets.  Note that in either case we are effectively
+  * assuming that the database character encoding matches the encoding implied
+  * by LC_CTYPE.
+  */
+ 
  /* ----------
!  * wide-character-aware lower function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_tolower(char *buff, size_t nbytes)
  {
  	char		*result;
  
***************
*** 1438,1464 ****
  
  #ifdef USE_WIDE_UPPER_LOWER
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 		result = wstring_upper(buff);
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pstrdup(buff);
  
  		for (p = result; *p; p++)
! 			*p = pg_toupper((unsigned char) *p);
  	}
  
  	return result;
  }
  
  /* ----------
!  * Convert string to lower case. It is designed to be multibyte-aware.
   * ----------
   */
! static char *
! str_tolower(char *buff)
  {
  	char		*result;
  
--- 1447,1492 ----
  
  #ifdef USE_WIDE_UPPER_LOWER
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		wchar_t		*workspace;
! 		int			curr_char = 0;
! 
! 		/* Output workspace cannot have more codes than input bytes */
! 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
! 
! 		char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
! 
! 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
! 			workspace[curr_char] = towlower(workspace[curr_char]);
! 
! 		/* Make result large enough; case change might change number of bytes */
! 		result = palloc(curr_char * MB_CUR_MAX + 1);
! 
! 		wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
! 		pfree(workspace);
! 	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pnstrdup(buff, nbytes);
  
  		for (p = result; *p; p++)
! 			*p = pg_tolower((unsigned char) *p);
  	}
  
  	return result;
  }
  
  /* ----------
!  * wide-character-aware upper function
!  * We pass the number of bytes so we can pass varlena and char*
!  * to this function.
   * ----------
   */
! char *
! str_toupper(char *buff, size_t nbytes)
  {
  	char		*result;
  
***************
*** 1467,1493 ****
  
  #ifdef USE_WIDE_UPPER_LOWER
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 		result = wstring_lower(buff);
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pstrdup(buff);
  
  		for (p = result; *p; p++)
! 			*p = pg_tolower((unsigned char) *p);
  	}
  
  	return result;
  }
!   
  /* ----------
   * wide-character-aware initcap function
   * ----------
   */
! static char *
! str_initcap(char *buff)
  {
  	char		*result;
  	bool		wasalnum = false;
--- 1495,1540 ----
  
  #ifdef USE_WIDE_UPPER_LOWER
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		wchar_t		*workspace;
! 		int			curr_char = 0;
! 
! 		/* Output workspace cannot have more codes than input bytes */
! 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
! 
! 		char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
! 
! 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
! 			workspace[curr_char] = towupper(workspace[curr_char]);
! 
! 		/* Make result large enough; case change might change number of bytes */
! 		result = palloc(curr_char * MB_CUR_MAX + 1);
! 
! 		wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
! 		pfree(workspace);
! 	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pnstrdup(buff, nbytes);
  
  		for (p = result; *p; p++)
! 			*p = pg_toupper((unsigned char) *p);
  	}
  
  	return result;
  }
! 
  /* ----------
   * wide-character-aware initcap function
+  * We pass the number of bytes so we can pass varlena and char*
+  * to this function.
   * ----------
   */
! char *
! str_initcap(char *buff, size_t nbytes)
  {
  	char		*result;
  	bool		wasalnum = false;
***************
*** 1499,1533 ****
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
  	{
  		wchar_t		*workspace;
! 		text		*in_text;
! 		text		*out_text;
! 		int			i;
  
! 		in_text = cstring_to_text(buff);
! 		workspace = texttowcs(in_text);
  
! 		for (i = 0; workspace[i] != 0; i++)
  		{
  			if (wasalnum)
! 				workspace[i] = towlower(workspace[i]);
  			else
! 				workspace[i] = towupper(workspace[i]);
! 			wasalnum = iswalnum(workspace[i]);
  		}
  
! 		out_text = wcstotext(workspace, i);
! 		result = text_to_cstring(out_text);
  
  		pfree(workspace);
- 		pfree(in_text);
- 		pfree(out_text);
  	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pstrdup(buff);
  
  		for (p = result; *p; p++)
  		{
--- 1546,1579 ----
  	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
  	{
  		wchar_t		*workspace;
! 		int			curr_char = 0;
! 
! 		/* Output workspace cannot have more codes than input bytes */
! 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
  
! 		char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
  
! 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
  		{
  			if (wasalnum)
! 				workspace[curr_char] = towlower(workspace[curr_char]);
  			else
! 				workspace[curr_char] = towupper(workspace[curr_char]);
! 			wasalnum = iswalnum(workspace[curr_char]);
  		}
  
! 		/* Make result large enough; case change might change number of bytes */
! 		result = palloc(curr_char * MB_CUR_MAX + 1);
  
+ 		wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
  		pfree(workspace);
  	}
  	else
  #endif		/* USE_WIDE_UPPER_LOWER */
  	{
  		char *p;
  
! 		result = pnstrdup(buff, nbytes);
  
  		for (p = result; *p; p++)
  		{
***************
*** 1851,1857 ****
  				{
  					char	   *p = pstrdup(tmtcTzn(in));
  
! 					strcpy(s, str_tolower(p));
  					pfree(p);
  					s += strlen(s);
  				}
--- 1897,1903 ----
  				{
  					char	   *p = pstrdup(tmtcTzn(in));
  
! 					strcpy(s, str_tolower(p, strlen(p)));
  					pfree(p);
  					s += strlen(s);
  				}
***************
*** 1893,1903 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
  				else
  				{
  					strcpy(workbuff, months_full[tm->tm_mon - 1]);
! 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
  				}
  				s += strlen(s);
  				break;
--- 1939,1951 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
! 								strlen(localized_full_months[tm->tm_mon - 1])));
  				else
  				{
  					strcpy(workbuff, months_full[tm->tm_mon - 1]);
! 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
! 								str_toupper(workbuff, strlen(workbuff)));
  				}
  				s += strlen(s);
  				break;
***************
*** 1906,1912 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
  				else
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
  				s += strlen(s);
--- 1954,1961 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
! 								strlen(localized_full_months[tm->tm_mon - 1])));
  				else
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
  				s += strlen(s);
***************
*** 1916,1922 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
  				else
  				{
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
--- 1965,1972 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
! 								strlen(localized_full_months[tm->tm_mon - 1])));
  				else
  				{
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
***************
*** 1929,1937 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
  				else
! 					strcpy(s, str_toupper(months[tm->tm_mon - 1]));
  				s += strlen(s);
  				break;
  			case DCH_Mon:
--- 1979,1989 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
! 								strlen(localized_abbrev_months[tm->tm_mon - 1])));
  				else
! 					strcpy(s, str_toupper(months[tm->tm_mon - 1],
! 								strlen(months[tm->tm_mon - 1])));
  				s += strlen(s);
  				break;
  			case DCH_Mon:
***************
*** 1939,1945 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
  				else
  					strcpy(s, months[tm->tm_mon - 1]);
  				s += strlen(s);
--- 1991,1998 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
! 								strlen(localized_abbrev_months[tm->tm_mon - 1])));
  				else
  					strcpy(s, months[tm->tm_mon - 1]);
  				s += strlen(s);
***************
*** 1949,1955 ****
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
  				else
  				{
  					strcpy(s, months[tm->tm_mon - 1]);
--- 2002,2009 ----
  				if (!tm->tm_mon)
  					break;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
! 								strlen(localized_abbrev_months[tm->tm_mon - 1])));
  				else
  				{
  					strcpy(s, months[tm->tm_mon - 1]);
***************
*** 1966,1983 ****
  			case DCH_DAY:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
  				else
  				{
  					strcpy(workbuff, days[tm->tm_wday]);
! 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
  				}
  				s += strlen(s);
  				break;
  			case DCH_Day:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
  				else
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
  				s += strlen(s);
--- 2020,2040 ----
  			case DCH_DAY:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
! 								strlen(localized_full_days[tm->tm_wday])));
  				else
  				{
  					strcpy(workbuff, days[tm->tm_wday]);
! 					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
! 								str_toupper(workbuff, strlen(workbuff)));
  				}
  				s += strlen(s);
  				break;
  			case DCH_Day:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
! 								strlen(localized_full_days[tm->tm_wday])));
  				else
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
  				s += strlen(s);
***************
*** 1985,1991 ****
  			case DCH_day:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
  				else
  				{
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
--- 2042,2049 ----
  			case DCH_day:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
! 								strlen(localized_full_days[tm->tm_wday])));
  				else
  				{
  					sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
***************
*** 1996,2010 ****
  			case DCH_DY:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
  				else
! 					strcpy(s, str_toupper(days_short[tm->tm_wday]));
  				s += strlen(s);
  				break;
  			case DCH_Dy:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
  				else
  					strcpy(s, days_short[tm->tm_wday]);
  				s += strlen(s);
--- 2054,2071 ----
  			case DCH_DY:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
! 								strlen(localized_abbrev_days[tm->tm_wday])));
  				else
! 					strcpy(s, str_toupper(days_short[tm->tm_wday],
! 								strlen(days_short[tm->tm_wday])));
  				s += strlen(s);
  				break;
  			case DCH_Dy:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
! 								strlen(localized_abbrev_days[tm->tm_wday])));
  				else
  					strcpy(s, days_short[tm->tm_wday]);
  				s += strlen(s);
***************
*** 2012,2018 ****
  			case DCH_dy:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
  				else
  				{
  					strcpy(s, days_short[tm->tm_wday]);
--- 2073,2080 ----
  			case DCH_dy:
  				INVALID_FOR_INTERVAL;
  				if (S_TM(n->suffix))
! 					strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
! 								strlen(localized_abbrev_days[tm->tm_wday])));
  				else
  				{
  					strcpy(s, days_short[tm->tm_wday]);
***************
*** 4277,4288 ****
  				case NUM_rn:
  					if (IS_FILLMODE(Np->Num))
  					{
! 						strcpy(Np->inout_p, str_tolower(Np->number_p));
  						Np->inout_p += strlen(Np->inout_p) - 1;
  					}
  					else
  					{
! 						sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
  						Np->inout_p += strlen(Np->inout_p) - 1;
  					}
  					break;
--- 4339,4352 ----
  				case NUM_rn:
  					if (IS_FILLMODE(Np->Num))
  					{
! 						strcpy(Np->inout_p, str_tolower(Np->number_p,
! 								strlen(Np->number_p)));
  						Np->inout_p += strlen(Np->inout_p) - 1;
  					}
  					else
  					{
! 						sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
! 								strlen(Np->number_p)));
  						Np->inout_p += strlen(Np->inout_p) - 1;
  					}
  					break;
Index: src/backend/utils/adt/oracle_compat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v
retrieving revision 1.80
diff -c -c -r1.80 oracle_compat.c
*** src/backend/utils/adt/oracle_compat.c	17 Jun 2008 16:09:06 -0000	1.80
--- src/backend/utils/adt/oracle_compat.c	23 Jun 2008 19:24:35 -0000
***************
*** 29,320 ****
  #endif
  
  #include "utils/builtins.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"
  
  
- /*
-  * If the system provides the needed functions for wide-character manipulation
-  * (which are all standardized by C99), then we implement upper/lower/initcap
-  * using wide-character functions.	Otherwise we use the traditional <ctype.h>
-  * functions, which of course will not work as desired in multibyte character
-  * sets.  Note that in either case we are effectively assuming that the
-  * database character encoding matches the encoding implied by LC_CTYPE.
-  */
- #ifdef USE_WIDE_UPPER_LOWER
- char	   *wstring_lower(char *str);
- char	   *wstring_upper(char *str);
- wchar_t	   *texttowcs(const text *txt);
- text	   *wcstotext(const wchar_t *str, int ncodes);
- #endif
- 
  static text *dotrim(const char *string, int stringlen,
  	   const char *set, int setlen,
  	   bool doltrim, bool dortrim);
  
  
- #ifdef USE_WIDE_UPPER_LOWER
- 
- /*
-  * Convert a TEXT value into a palloc'd wchar string.
-  */
- wchar_t *
- texttowcs(const text *txt)
- {
- 	int			nbytes = VARSIZE_ANY_EXHDR(txt);
- 	char	   *workstr;
- 	wchar_t    *result;
- 	size_t		ncodes;
- 
- 	/* Overflow paranoia */
- 	if (nbytes < 0 ||
- 		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
- 		ereport(ERROR,
- 				(errcode(ERRCODE_OUT_OF_MEMORY),
- 				 errmsg("out of memory")));
- 
- 	/* Need a null-terminated version of the input */
- 	workstr = text_to_cstring(txt);
- 
- 	/* Output workspace cannot have more codes than input bytes */
- 	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- 
- 	/* Do the conversion */
- 	ncodes = mbstowcs(result, workstr, nbytes + 1);
- 
- 	if (ncodes == (size_t) -1)
- 	{
- 		/*
- 		 * Invalid multibyte character encountered.  We try to give a useful
- 		 * error message by letting pg_verifymbstr check the string.  But it's
- 		 * possible that the string is OK to us, and not OK to mbstowcs ---
- 		 * this suggests that the LC_CTYPE locale is different from the
- 		 * database encoding.  Give a generic error message if verifymbstr
- 		 * can't find anything wrong.
- 		 */
- 		pg_verifymbstr(workstr, nbytes, false);
- 		ereport(ERROR,
- 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- 				 errmsg("invalid multibyte character for locale"),
- 				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- 	}
- 
- 	Assert(ncodes <= (size_t) nbytes);
- 
- 	return result;
- }
- 
- 
- /*
-  * Convert a wchar string into a palloc'd TEXT value.  The wchar string
-  * must be zero-terminated, but we also require the caller to pass the string
-  * length, since it will know it anyway in current uses.
-  */
- text *
- wcstotext(const wchar_t *str, int ncodes)
- {
- 	text	   *result;
- 	size_t		nbytes;
- 
- 	/* Overflow paranoia */
- 	if (ncodes < 0 ||
- 		ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
- 		ereport(ERROR,
- 				(errcode(ERRCODE_OUT_OF_MEMORY),
- 				 errmsg("out of memory")));
- 
- 	/* Make workspace certainly large enough for result */
- 	result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
- 
- 	/* Do the conversion */
- 	nbytes = wcstombs((char *) VARDATA(result), str,
- 					  (ncodes + 1) * MB_CUR_MAX);
- 
- 	if (nbytes == (size_t) -1)
- 	{
- 		/* Invalid multibyte character encountered ... shouldn't happen */
- 		ereport(ERROR,
- 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- 				 errmsg("invalid multibyte character for locale")));
- 	}
- 
- 	Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
- 
- 	SET_VARSIZE(result, nbytes + VARHDRSZ);
- 
- 	return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
- 
- 
- /*
-  * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
-  * To make use of the upper/lower functionality, we need to map UTF8 to
-  * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
-  * This conversion layer takes care of it.
-  */
- 
- #ifdef WIN32
- 
- /* texttowcs for the case of UTF8 to UTF16 */
- static wchar_t *
- win32_utf8_texttowcs(const text *txt)
- {
- 	int			nbytes = VARSIZE_ANY_EXHDR(txt);
- 	wchar_t    *result;
- 	int			r;
- 
- 	/* Overflow paranoia */
- 	if (nbytes < 0 ||
- 		nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
- 		ereport(ERROR,
- 				(errcode(ERRCODE_OUT_OF_MEMORY),
- 				 errmsg("out of memory")));
- 
- 	/* Output workspace cannot have more codes than input bytes */
- 	result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
- 
- 	/* stupid Microsloth API does not work for zero-length input */
- 	if (nbytes == 0)
- 		r = 0;
- 	else
- 	{
- 		/* Do the conversion */
- 		r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
- 								result, nbytes);
- 
- 		if (r <= 0)				/* assume it's NO_UNICODE_TRANSLATION */
- 		{
- 			/* see notes above about error reporting */
- 			pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
- 			ereport(ERROR,
- 					(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- 					 errmsg("invalid multibyte character for locale"),
- 					 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
- 		}
- 	}
- 
- 	/* Append trailing null wchar (MultiByteToWideChar won't have) */
- 	Assert(r <= nbytes);
- 	result[r] = 0;
- 
- 	return result;
- }
- 
- /* wcstotext for the case of UTF16 to UTF8 */
- static text *
- win32_utf8_wcstotext(const wchar_t *str)
- {
- 	text	   *result;
- 	int			nbytes;
- 	int			r;
- 
- 	/* Compute size of output string (this *will* include trailing null) */
- 	nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
- 	if (nbytes <= 0)			/* shouldn't happen */
- 		ereport(ERROR,
- 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- 				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
- 						GetLastError())));
- 
- 	result = palloc(nbytes + VARHDRSZ);
- 
- 	r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
- 							NULL, NULL);
- 	if (r != nbytes)			/* shouldn't happen */
- 		ereport(ERROR,
- 				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
- 				 errmsg("UTF-16 to UTF-8 translation failed: %lu",
- 						GetLastError())));
- 
- 	SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
- 
- 	return result;
- }
- 
- /* interface layer to check which encoding is in use */
- 
- static wchar_t *
- win32_texttowcs(const text *txt)
- {
- 	if (GetDatabaseEncoding() == PG_UTF8)
- 		return win32_utf8_texttowcs(txt);
- 	else
- 		return texttowcs(txt);
- }
- 
- static text *
- win32_wcstotext(const wchar_t *str, int ncodes)
- {
- 	if (GetDatabaseEncoding() == PG_UTF8)
- 		return win32_utf8_wcstotext(str);
- 	else
- 		return wcstotext(str, ncodes);
- }
- 
- /* use macros to cause routines below to call interface layer */
- 
- #define texttowcs	win32_texttowcs
- #define wcstotext	win32_wcstotext
- #endif   /* WIN32 */
- 
- #ifdef USE_WIDE_UPPER_LOWER
- /*
-  * string_upper and string_lower are used for correct multibyte upper/lower
-  * transformations localized strings. Returns pointers to transformated
-  * string.
-  */
- char *
- wstring_upper(char *str)
- {
- 	wchar_t    *workspace;
- 	text	   *in_text;
- 	text	   *out_text;
- 	char	   *result;
- 	int			i;
- 
- 	in_text = cstring_to_text(str);
- 	workspace = texttowcs(in_text);
- 
- 	for (i = 0; workspace[i] != 0; i++)
- 		workspace[i] = towupper(workspace[i]);
- 
- 	out_text = wcstotext(workspace, i);
- 	result = text_to_cstring(out_text);
- 
- 	pfree(workspace);
- 	pfree(in_text);
- 	pfree(out_text);
- 
- 	return result;
- }
- 
- char *
- wstring_lower(char *str)
- {
- 	wchar_t    *workspace;
- 	text	   *in_text;
- 	text	   *out_text;
- 	char	   *result;
- 	int			i;
- 
- 	in_text = cstring_to_text(str);
- 	workspace = texttowcs(in_text);
- 
- 	for (i = 0; workspace[i] != 0; i++)
- 		workspace[i] = towlower(workspace[i]);
- 
- 	out_text = wcstotext(workspace, i);
- 	result = text_to_cstring(out_text);
- 
- 	pfree(workspace);
- 	pfree(in_text);
- 	pfree(out_text);
- 
- 	return result;
- }
- #endif   /* USE_WIDE_UPPER_LOWER */
- 
  /********************************************************************
   *
   * lower
--- 29,44 ----
  #endif
  
  #include "utils/builtins.h"
+ #include "utils/formatting.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"
  
  
  static text *dotrim(const char *string, int stringlen,
  	   const char *set, int setlen,
  	   bool doltrim, bool dortrim);
  
  
  /********************************************************************
   *
   * lower
***************
*** 332,383 ****
  Datum
  lower(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
! 
! 	/*
! 	 * Use wide char code only when max encoding length > 1 and ctype != C.
! 	 * Some operating systems fail with multi-byte encodings and a C locale.
! 	 * Also, for a C locale there is no need to process as multibyte.
! 	 */
! 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		text	   *string = PG_GETARG_TEXT_PP(0);
! 		text	   *result;
! 		wchar_t    *workspace;
! 		int			i;
! 
! 		workspace = texttowcs(string);
! 
! 		for (i = 0; workspace[i] != 0; i++)
! 			workspace[i] = towlower(workspace[i]);
  
! 		result = wcstotext(workspace, i);
! 
! 		pfree(workspace);
! 
! 		PG_RETURN_TEXT_P(result);
! 	}
! 	else
! #endif   /* USE_WIDE_UPPER_LOWER */
! 	{
! 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
! 		char	   *ptr;
! 		int			m;
! 
! 		/*
! 		 * Since we copied the string, we can scribble directly on the value
! 		 */
! 		ptr = VARDATA(string);
! 		m = VARSIZE(string) - VARHDRSZ;
! 
! 		while (m-- > 0)
! 		{
! 			*ptr = tolower((unsigned char) *ptr);
! 			ptr++;
! 		}
! 
! 		PG_RETURN_TEXT_P(string);
! 	}
  }
  
  
--- 56,70 ----
  Datum
  lower(PG_FUNCTION_ARGS)
  {
! 	text	*in_string = PG_GETARG_TEXT_PP(0);
! 	char	*out_string;
! 	text	*result;
! 
! 	out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! 	result = cstring_to_text(out_string);
! 	pfree(out_string);
  
! 	PG_RETURN_TEXT_P(result);
  }
  
  
***************
*** 398,449 ****
  Datum
  upper(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
  
! 	/*
! 	 * Use wide char code only when max encoding length > 1 and ctype != C.
! 	 * Some operating systems fail with multi-byte encodings and a C locale.
! 	 * Also, for a C locale there is no need to process as multibyte.
! 	 */
! 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		text	   *string = PG_GETARG_TEXT_PP(0);
! 		text	   *result;
! 		wchar_t    *workspace;
! 		int			i;
! 
! 		workspace = texttowcs(string);
! 
! 		for (i = 0; workspace[i] != 0; i++)
! 			workspace[i] = towupper(workspace[i]);
! 
! 		result = wcstotext(workspace, i);
! 
! 		pfree(workspace);
! 
! 		PG_RETURN_TEXT_P(result);
! 	}
! 	else
! #endif   /* USE_WIDE_UPPER_LOWER */
! 	{
! 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
! 		char	   *ptr;
! 		int			m;
! 
! 		/*
! 		 * Since we copied the string, we can scribble directly on the value
! 		 */
! 		ptr = VARDATA(string);
! 		m = VARSIZE(string) - VARHDRSZ;
! 
! 		while (m-- > 0)
! 		{
! 			*ptr = toupper((unsigned char) *ptr);
! 			ptr++;
! 		}
! 
! 		PG_RETURN_TEXT_P(string);
! 	}
  }
  
  
--- 85,99 ----
  Datum
  upper(PG_FUNCTION_ARGS)
  {
! 	text	*in_string = PG_GETARG_TEXT_PP(0);
! 	char	*out_string;
! 	text	*result;
! 
! 	out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! 	result = cstring_to_text(out_string);
! 	pfree(out_string);
  
! 	PG_RETURN_TEXT_P(result);
  }
  
  
***************
*** 467,530 ****
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! #ifdef USE_WIDE_UPPER_LOWER
  
! 	/*
! 	 * Use wide char code only when max encoding length > 1 and ctype != C.
! 	 * Some operating systems fail with multi-byte encodings and a C locale.
! 	 * Also, for a C locale there is no need to process as multibyte.
! 	 */
! 	if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
! 	{
! 		text	   *string = PG_GETARG_TEXT_PP(0);
! 		text	   *result;
! 		wchar_t    *workspace;
! 		int			wasalnum = 0;
! 		int			i;
! 
! 		workspace = texttowcs(string);
! 
! 		for (i = 0; workspace[i] != 0; i++)
! 		{
! 			if (wasalnum)
! 				workspace[i] = towlower(workspace[i]);
! 			else
! 				workspace[i] = towupper(workspace[i]);
! 			wasalnum = iswalnum(workspace[i]);
! 		}
! 
! 		result = wcstotext(workspace, i);
! 
! 		pfree(workspace);
! 
! 		PG_RETURN_TEXT_P(result);
! 	}
! 	else
! #endif   /* USE_WIDE_UPPER_LOWER */
! 	{
! 		text	   *string = PG_GETARG_TEXT_P_COPY(0);
! 		int			wasalnum = 0;
! 		char	   *ptr;
! 		int			m;
! 
! 		/*
! 		 * Since we copied the string, we can scribble directly on the value
! 		 */
! 		ptr = VARDATA(string);
! 		m = VARSIZE(string) - VARHDRSZ;
! 
! 		while (m-- > 0)
! 		{
! 			if (wasalnum)
! 				*ptr = tolower((unsigned char) *ptr);
! 			else
! 				*ptr = toupper((unsigned char) *ptr);
! 			wasalnum = isalnum((unsigned char) *ptr);
! 			ptr++;
! 		}
! 
! 		PG_RETURN_TEXT_P(string);
! 	}
  }
  
  
--- 117,131 ----
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
! 	text	*in_string = PG_GETARG_TEXT_PP(0);
! 	char	*out_string;
! 	text	*result;
! 
! 	out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
! 	result = cstring_to_text(out_string);
! 	pfree(out_string);
  
! 	PG_RETURN_TEXT_P(result);
  }
  
  
Index: src/include/utils/formatting.h
===================================================================
RCS file: /cvsroot/pgsql/src/include/utils/formatting.h,v
retrieving revision 1.18
diff -c -c -r1.18 formatting.h
*** src/include/utils/formatting.h	1 Jan 2008 19:45:59 -0000	1.18
--- src/include/utils/formatting.h	23 Jun 2008 19:24:36 -0000
***************
*** 21,26 ****
--- 21,30 ----
  #include "fmgr.h"
  
  
+ extern char *str_tolower(char *buff, size_t nbytes);
+ extern char *str_toupper(char *buff, size_t nbytes);
+ extern char *str_initcap(char *buff, size_t nbytes);
+ 
  extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
  extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
  extern Datum interval_to_char(PG_FUNCTION_ARGS);

-- 
Sent via pgsql-patches mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-patches

Re: [PATCHES] Simplify formatting.c

Reply via email to