Re: [HACKERS] NaN support in NUMERIC data type

Sam Mason Wed, 08 Apr 2009 04:31:33 -0700

On Tue, Apr 07, 2009 at 12:51:21PM -0400, Tom Lane wrote:
> Sam Mason <[email protected]> writes:
> >   SELECT 'NaN'::float8;
> >   SELECT ' NaN'::float8;
> >   SELECT 'NaN '::float8;
> >   SELECT '+NaN'::float8;
> >   SELECT '-NaN'::float8;
> 
> Well, the +- part must be an artifact of your strtod() implementation;
> our own code isn't doing anything to accept that.  I think it's pretty
> bogus --- NaNs do not have signs.


OK, didn't actually check the code with that; no point worrying about it
then.

> IIRC, the explicit support for leading/trailing spaces is something that
> we added in float8in long after numeric_in was written, and I think just
> nobody thought about numeric at the time.  But it's clearly inconsistent
> to allow spaces around a regular value but not a NaN.

Good, I wanted to make sure it wasn't a deliberate thing before doing
too much.

> Possibly the logic for leading/trailing spaces could be pulled out
> of set_var_from_str and executed in numeric_in instead?

Yes, I didn't want to do this before because it's called from a
couple of other places.  I looked again and realised that we're
generating those in very fixed formats so don't need to worry about
leading/trailing spaces and hence can move the code up to numeric_in.

The attached patch gives set_var_from_str an "endptr" similar to strtod
so handling is closer to the float[48]in code.  I moved error reporting
code outside as well to cut down on the multiple identical calls.

The included patch was generated against 8.3.5 (because that's what I
had lying around when I started playing) but applies with a little fuzz
to the latest snapshot and does the right thing for me in both versions.

-- 
  Sam  http://samason.me.uk/

*** src/backend/utils/adt/numeric.c	2009-04-07 16:49:08.000000000 +0100
--- src/backend/utils/adt/numeric.c	2009-04-08 11:53:13.000000000 +0100
***************
*** 238,244 ****
  static void free_var(NumericVar *var);
  static void zero_var(NumericVar *var);
  
! static void set_var_from_str(const char *str, NumericVar *dest);
  static void set_var_from_num(Numeric value, NumericVar *dest);
  static void set_var_from_var(NumericVar *value, NumericVar *dest);
  static char *get_str_from_var(NumericVar *var, int dscale);
--- 238,244 ----
  static void free_var(NumericVar *var);
  static void zero_var(NumericVar *var);
  
! static void set_var_from_str(const char *str, const char ** endptr, NumericVar *dest);
  static void set_var_from_num(Numeric value, NumericVar *dest);
  static void set_var_from_var(NumericVar *value, NumericVar *dest);
  static char *get_str_from_var(NumericVar *var, int dscale);
***************
*** 310,315 ****
--- 310,317 ----
  numeric_in(PG_FUNCTION_ARGS)
  {
  	char	   *str = PG_GETARG_CSTRING(0);
+ 	char	   *orig_str;
+ 	const char *endptr;
  
  #ifdef NOT_USED
  	Oid			typelem = PG_GETARG_OID(1);
***************
*** 318,335 ****
  	NumericVar	value;
  	Numeric		res;
  
! 	/*
! 	 * Check for NaN
  	 */
! 	if (pg_strcasecmp(str, "NaN") == 0)
! 		PG_RETURN_NUMERIC(make_result(&const_nan));
  
  	/*
  	 * Use set_var_from_str() to parse the input string and return it in the
  	 * packed DB storage format
  	 */
  	init_var(&value);
! 	set_var_from_str(str, &value);
  
  	apply_typmod(&value, typmod);
  
--- 320,369 ----
  	NumericVar	value;
  	Numeric		res;
  
! 	/* 
! 	 * To allow us to generate sensible error messages we tuck the
! 	 * original start of the string away so we can use it later.
  	 */
! 	orig_str = str;
! 
! 	/* skip leading spaces */
! 	while (isspace((unsigned char) *str))
! 		str++;
  
  	/*
  	 * Use set_var_from_str() to parse the input string and return it in the
  	 * packed DB storage format
  	 */
  	init_var(&value);
! 	set_var_from_str(str, &endptr, &value);
! 
! 	/*
! 	 * we didn't see anything that looked like a numeric value
! 	 */
! 	if (str == endptr) {
! 		/*
! 		 * Check for NaN
! 		 */
! 		if (pg_strncasecmp(str, "NaN", 3) == 0) {
! 			value = const_nan;
! 			endptr = str + 3;
! 		} else
! 			ereport(ERROR,
! 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 					 errmsg("invalid input syntax for type numeric: \"%s\"",
! 							orig_str)));
! 	}
! 
! 	/* skip trailing spaces */
! 	while (isspace((unsigned char) *endptr))
! 		endptr++;
! 
! 	if (*endptr != '\0') {
! 		ereport(ERROR,
! 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 				 errmsg("invalid input syntax for type numeric: \"%s\"",
! 						orig_str)));
! 	}
  
  	apply_typmod(&value, typmod);
  
***************
*** 2056,2061 ****
--- 2090,2096 ----
  	Numeric		res;
  	NumericVar	result;
  	char		buf[DBL_DIG + 100];
+ 	const char *endptr;
  
  	if (isnan(val))
  		PG_RETURN_NUMERIC(make_result(&const_nan));
***************
*** 2064,2070 ****
  
  	init_var(&result);
  
! 	set_var_from_str(buf, &result);
  	res = make_result(&result);
  
  	free_var(&result);
--- 2099,2111 ----
  
  	init_var(&result);
  
! 	set_var_from_str(buf, &endptr, &result);
! 	if (endptr == buf) {
! 		ereport(ERROR,
! 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 				 errmsg("invalid input syntax for type numeric: \"%s\"",
! 						buf)));
! 	}
  	res = make_result(&result);
  
  	free_var(&result);
***************
*** 2116,2121 ****
--- 2157,2163 ----
  	Numeric		res;
  	NumericVar	result;
  	char		buf[FLT_DIG + 100];
+ 	const char *endptr;
  
  	if (isnan(val))
  		PG_RETURN_NUMERIC(make_result(&const_nan));
***************
*** 2124,2130 ****
  
  	init_var(&result);
  
! 	set_var_from_str(buf, &result);
  	res = make_result(&result);
  
  	free_var(&result);
--- 2166,2178 ----
  
  	init_var(&result);
  
! 	set_var_from_str(buf, &endptr, &result);
! 	if (endptr == buf) {
! 		ereport(ERROR,
! 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 				 errmsg("invalid input syntax for type numeric: \"%s\"",
! 						buf)));
! 	}
  	res = make_result(&result);
  
  	free_var(&result);
***************
*** 2901,2907 ****
   *	Parse a string and put the number into a variable
   */
  static void
! set_var_from_str(const char *str, NumericVar *dest)
  {
  	const char *cp = str;
  	bool		have_dp = FALSE;
--- 2949,2955 ----
   *	Parse a string and put the number into a variable
   */
  static void
! set_var_from_str(const char *str, const char **endptr, NumericVar *dest)
  {
  	const char *cp = str;
  	bool		have_dp = FALSE;
***************
*** 2916,2934 ****
  	int			offset;
  	NumericDigit *digits;
  
  	/*
  	 * We first parse the string to extract decimal digits and determine the
  	 * correct decimal weight.	Then convert to NBASE representation.
  	 */
  
- 	/* skip leading spaces */
- 	while (*cp)
- 	{
- 		if (!isspace((unsigned char) *cp))
- 			break;
- 		cp++;
- 	}
- 
  	switch (*cp)
  	{
  		case '+':
--- 2964,2977 ----
  	int			offset;
  	NumericDigit *digits;
  
+ 	/* record where we started so calling code can generate errors appropriately */
+ 	*endptr = str;
+ 
  	/*
  	 * We first parse the string to extract decimal digits and determine the
  	 * correct decimal weight.	Then convert to NBASE representation.
  	 */
  
  	switch (*cp)
  	{
  		case '+':
***************
*** 2949,2957 ****
  	}
  
  	if (!isdigit((unsigned char) *cp))
! 		ereport(ERROR,
! 				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 			  errmsg("invalid input syntax for type numeric: \"%s\"", str)));
  
  	decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
  
--- 2992,2998 ----
  	}
  
  	if (!isdigit((unsigned char) *cp))
! 		return;
  
  	decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
  
***************
*** 2972,2981 ****
  		else if (*cp == '.')
  		{
  			if (have_dp)
! 				ereport(ERROR,
! 						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 					  errmsg("invalid input syntax for type numeric: \"%s\"",
! 							 str)));
  			have_dp = TRUE;
  			cp++;
  		}
--- 3013,3019 ----
  		else if (*cp == '.')
  		{
  			if (have_dp)
! 				return;
  			have_dp = TRUE;
  			cp++;
  		}
***************
*** 2996,3028 ****
  		cp++;
  		exponent = strtol(cp, &endptr, 10);
  		if (endptr == cp)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 					 errmsg("invalid input syntax for type numeric: \"%s\"",
! 							str)));
  		cp = endptr;
  		if (exponent > NUMERIC_MAX_PRECISION ||
  			exponent < -NUMERIC_MAX_PRECISION)
! 			ereport(ERROR,
! 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 					 errmsg("invalid input syntax for type numeric: \"%s\"",
! 							str)));
  		dweight += (int) exponent;
  		dscale -= (int) exponent;
  		if (dscale < 0)
  			dscale = 0;
  	}
  
! 	/* Should be nothing left but spaces */
! 	while (*cp)
! 	{
! 		if (!isspace((unsigned char) *cp))
! 			ereport(ERROR,
! 					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
! 					 errmsg("invalid input syntax for type numeric: \"%s\"",
! 							str)));
! 		cp++;
! 	}
  
  	/*
  	 * Okay, convert pure-decimal representation to base NBASE.  First we need
--- 3034,3053 ----
  		cp++;
  		exponent = strtol(cp, &endptr, 10);
  		if (endptr == cp)
! 			return;
! 
  		cp = endptr;
  		if (exponent > NUMERIC_MAX_PRECISION ||
  			exponent < -NUMERIC_MAX_PRECISION)
! 			return;
  		dweight += (int) exponent;
  		dscale -= (int) exponent;
  		if (dscale < 0)
  			dscale = 0;
  	}
  
! 	/* record where the end of the number was for the calling code */
! 	*endptr = cp;
  
  	/*
  	 * Okay, convert pure-decimal representation to base NBASE.  First we need

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] NaN support in NUMERIC data type

Reply via email to