Re: Non-decimal integer literals

Dean Rasheed Fri, 13 Jan 2023 02:01:47 -0800

On Wed, 14 Dec 2022 at 05:47, Peter Eisentraut
<peter.eisentr...@enterprisedb.com> wrote:
>
> committed


Now that we have this for integer types, I think it's worth doing for
numeric as well, since the parser will now pass such things through to
numeric_in() when they don't fit in an int64, and it seems plausible
that at least some people might use non-decimal integers beyond
INT64MIN/MAX. Also, without such support in numeric_in(), the feature
looks a little incomplete:

SELECT -0x8000000000000000;
       ?column?
----------------------
 -9223372036854775808
(1 row)

SELECT 0x8000000000000000;
ERROR:  invalid input syntax for type numeric: "0x8000000000000000"
LINE 1: select 0x8000000000000000;
               ^

One concern I had was what the performance would be like. I don't
really expect people to pass in the kinds of truly huge values that
numeric supports, but it can't be ruled out. So I gave it a go, to see
how hard it would be, and what the worst-case performance looks like.
(I included underscore-handling too, so that I could measure that at
the same time.)

The base-conversion algorithm is O(N^2), and the worst case before
overflow is with hex strings with around 108,000 digits, oct strings
with around 145,000 digits, or binary strings with around 435,000
digits. Each of those takes around 400ms to parse on my machine.
That's around the level at which I might consider adding
CHECK_FOR_INTERRUPTS()'s, but I think that it's probably not worth it,
given how unrealistic such huge inputs are in practice.

The other important thing is that this shouldn't impact the
performance when parsing regular decimal inputs. The bulk of the
non-decimal integer parsing is handled by a separate function, which
is called directly from numeric_in(), since non-decimal handling isn't
required at the set_var_from_str() level (used by the float4/8 ->
numeric conversion functions). I also re-arranged the numeric_in()
code somewhat, and was able to make substantial savings by reducing
the number of pg_strncasecmp() calls, and avoiding those calls
entirely for regular numbers that aren't NaN or Inf. Testing that with
COPY with a few million numbers of different sizes, I observed a
10-15% performance increase.

So I'm feeling quite good about the end result -- I set out hoping not
to make performance noticeably worse, but ended up making it
significantly better.

Regards,
Dean

From f129bcdaeaaa62d8ddaf6a8e6441183f46097687 Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rash...@gmail.com>
Date: Fri, 13 Jan 2023 09:20:17 +0000
Subject: [PATCH 1/2] Add non-decimal integer support to type numeric.

This enhances the numeric type input function, adding support for
hexadecimal, octal, and binary integers of any size, up to the limits
of the numeric type.

Since 6fcda9aba8, such non-decimal integers have been accepted by the
parser as integer literals and passed through to numeric_in(). This
commit gives numeric_in() the ability to handle them.

While at it, simplify the handling of NaN and infinities, reducing the
number of calls to pg_strncasecmp(), and arrange for pg_strncasecmp()
to not be called at all for regular numbers. This gives a significant
performance improvement for decimal inputs, more than offsetting the
small performance hit of checking for non-decimal input.
---
 src/backend/utils/adt/numeric.c          | 355 +++++++++++++++++++----
 src/test/regress/expected/numeric.out    |  62 +++-
 src/test/regress/expected/numerology.out |  48 +--
 src/test/regress/sql/numeric.sql         |  10 +
 4 files changed, 380 insertions(+), 95 deletions(-)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index a6409ecbee..ed592841dc 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -500,6 +500,11 @@ static void zero_var(NumericVar *var);
 static bool set_var_from_str(const char *str, const char *cp,
 							 NumericVar *dest, const char **endptr,
 							 Node *escontext);
+static bool set_var_from_non_decimal_integer_str(const char *str,
+												 const char *cp, int sign,
+												 int base, NumericVar *dest,
+												 const char **endptr,
+												 Node *escontext);
 static void set_var_from_num(Numeric num, NumericVar *dest);
 static void init_var_from_num(Numeric num, NumericVar *dest);
 static void set_var_from_var(const NumericVar *value, NumericVar *dest);
@@ -625,6 +630,8 @@ numeric_in(PG_FUNCTION_ARGS)
 	Node	   *escontext = fcinfo->context;
 	Numeric		res;
 	const char *cp;
+	const char *numstart;
+	int			sign;
 
 	/* Skip leading spaces */
 	cp = str;
@@ -636,70 +643,130 @@ numeric_in(PG_FUNCTION_ARGS)
 	}
 
 	/*
-	 * Check for NaN and infinities.  We recognize the same strings allowed by
-	 * float8in().
+	 * Process the number's sign. This duplicates logic in set_var_from_str(),
+	 * but it's worth doing here, since it simplifies the handling of
+	 * infinities and non-decimal integers.
 	 */
-	if (pg_strncasecmp(cp, "NaN", 3) == 0)
-	{
-		res = make_result(&const_nan);
-		cp += 3;
-	}
-	else if (pg_strncasecmp(cp, "Infinity", 8) == 0)
-	{
-		res = make_result(&const_pinf);
-		cp += 8;
-	}
-	else if (pg_strncasecmp(cp, "+Infinity", 9) == 0)
-	{
-		res = make_result(&const_pinf);
-		cp += 9;
-	}
-	else if (pg_strncasecmp(cp, "-Infinity", 9) == 0)
-	{
-		res = make_result(&const_ninf);
-		cp += 9;
-	}
-	else if (pg_strncasecmp(cp, "inf", 3) == 0)
-	{
-		res = make_result(&const_pinf);
-		cp += 3;
-	}
-	else if (pg_strncasecmp(cp, "+inf", 4) == 0)
+	numstart = cp;
+	sign = NUMERIC_POS;
+
+	if (*cp == '+')
+		cp++;
+	else if (*cp == '-')
 	{
-		res = make_result(&const_pinf);
-		cp += 4;
+		sign = NUMERIC_NEG;
+		cp++;
 	}
-	else if (pg_strncasecmp(cp, "-inf", 4) == 0)
+
+	/*
+	 * Check for NaN and infinities.  We recognize the same strings allowed by
+	 * float8in().
+	 *
+	 * Since all other legal inputs have a digit or a decimal point after the
+	 * sign, we need only check for NaN/infinity if that's not the case.
+	 */
+	if (!isdigit((unsigned char) *cp) && *cp != '.')
 	{
-		res = make_result(&const_ninf);
-		cp += 4;
+		/*
+		 * The number must be NaN or infinity; anything else can only be a
+		 * syntax error. Note that NaN mustn't have a sign.
+		 */
+		if (pg_strncasecmp(numstart, "NaN", 3) == 0)
+		{
+			res = make_result(&const_nan);
+			cp = numstart + 3;
+		}
+		else if (pg_strncasecmp(cp, "Infinity", 8) == 0)
+		{
+			res = make_result(sign == NUMERIC_POS ? &const_pinf : &const_ninf);
+			cp += 8;
+		}
+		else if (pg_strncasecmp(cp, "inf", 3) == 0)
+		{
+			res = make_result(sign == NUMERIC_POS ? &const_pinf : &const_ninf);
+			cp += 3;
+		}
+		else
+			goto invalid_syntax;
+
+		/*
+		 * Check for trailing junk; there should be nothing left but spaces.
+		 *
+		 * We intentionally do this check before applying the typmod because
+		 * we would like to throw any trailing-junk syntax error before any
+		 * semantic error resulting from apply_typmod_special().
+		 */
+		while (*cp)
+		{
+			if (!isspace((unsigned char) *cp))
+				goto invalid_syntax;
+			cp++;
+		}
+
+		if (!apply_typmod_special(res, typmod, escontext))
+			PG_RETURN_NULL();
 	}
 	else
 	{
 		/*
-		 * Use set_var_from_str() to parse a normal numeric value
+		 * We have a normal numeric value, which may be a non-decimal integer
+		 * or a regular decimal number.
 		 */
 		NumericVar	value;
+		int			base;
 		bool		have_error;
 
 		init_var(&value);
 
-		if (!set_var_from_str(str, cp, &value, &cp, escontext))
-			PG_RETURN_NULL();
+		/*
+		 * Determine the number's base by looking for a non-decimal prefix
+		 * indicator ("0x", "0o", or "0b").
+		 */
+		if (cp[0] == '0')
+		{
+			switch (cp[1])
+			{
+				case 'x':
+				case 'X':
+					base = 16;
+					break;
+				case 'o':
+				case 'O':
+					base = 8;
+					break;
+				case 'b':
+				case 'B':
+					base = 2;
+					break;
+				default:
+					base = 10;
+			}
+		}
+		else
+			base = 10;
+
+		/* Parse the rest of the number and apply the sign */
+		if (base == 10)
+		{
+			if (!set_var_from_str(str, cp, &value, &cp, escontext))
+				PG_RETURN_NULL();
+			value.sign = sign;
+		}
+		else
+		{
+			if (!set_var_from_non_decimal_integer_str(str, cp + 2, sign, base,
+													  &value, &cp, escontext))
+				PG_RETURN_NULL();
+		}
 
 		/*
-		 * We duplicate a few lines of code here because we would like to
-		 * throw any trailing-junk syntax error before any semantic error
-		 * resulting from apply_typmod.  We can't easily fold the two cases
-		 * together because we mustn't apply apply_typmod to a NaN/Inf.
+		 * Should be nothing left but spaces. As above, throw any typmod error
+		 * after finishing syntax check.
 		 */
 		while (*cp)
 		{
 			if (!isspace((unsigned char) *cp))
-				ereturn(escontext, (Datum) 0,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s: \"%s\"",
-								"numeric", str)));
+				goto invalid_syntax;
 			cp++;
 		}
 
@@ -714,26 +781,15 @@ numeric_in(PG_FUNCTION_ARGS)
 					 errmsg("value overflows numeric format")));
 
 		free_var(&value);
-
-		PG_RETURN_NUMERIC(res);
-	}
-
-	/* Should be nothing left but spaces */
-	while (*cp)
-	{
-		if (!isspace((unsigned char) *cp))
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s: \"%s\"",
-							"numeric", str)));
-		cp++;
 	}
 
-	/* As above, throw any typmod error after finishing syntax check */
-	if (!apply_typmod_special(res, typmod, escontext))
-		PG_RETURN_NULL();
-
 	PG_RETURN_NUMERIC(res);
+
+invalid_syntax:
+	ereturn(escontext, (Datum) 0,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for type %s: \"%s\"",
+					"numeric", str)));
 }
 
 
@@ -6988,6 +7044,183 @@ set_var_from_str(const char *str, const char *cp,
 }
 
 
+/*
+ * Return the numeric value of a single hex digit.
+ */
+static inline int
+xdigit_value(char dig)
+{
+	return dig >= '0' && dig <= '9' ? dig - '0' :
+		dig >= 'a' && dig <= 'f' ? dig - 'a' + 10 :
+		dig >= 'A' && dig <= 'F' ? dig - 'A' + 10 : -1;
+}
+
+/*
+ * set_var_from_non_decimal_integer_str()
+ *
+ *	Parse a string containing a non-decimal integer
+ *
+ * This function does not handle leading or trailing spaces.  It returns
+ * the end+1 position parsed into *endptr, so that caller can check for
+ * trailing spaces/garbage if deemed necessary.
+ *
+ * cp is the place to actually start parsing; str is what to use in error
+ * reports.  The number's sign and base prefix indicator (e.g., "0x") are
+ * assumed to have already been parsed, so cp should point to the number's
+ * first digit in the base specified.
+ *
+ * base is expected to be 2, 8 or 16.
+ *
+ * Returns true on success, false on failure (if escontext points to an
+ * ErrorSaveContext; otherwise errors are thrown).
+ */
+static bool
+set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
+									 int base, NumericVar *dest,
+									 const char **endptr, Node *escontext)
+{
+	const char *firstdigit = cp;
+	int64		tmp;
+	int64		mul;
+	NumericVar	tmp_var;
+
+	init_var(&tmp_var);
+
+	zero_var(dest);
+
+	/* Process digits in groups that fit in an int64 */
+	tmp = 0;
+	mul = 1;
+
+	if (base == 16)
+	{
+		while (*cp)
+		{
+			if (isxdigit((unsigned char) *cp))
+			{
+				if (mul > PG_INT64_MAX / 16)
+				{
+					/* Add the contribution from this group of digits */
+					int64_to_numericvar(mul, &tmp_var);
+					mul_var(dest, &tmp_var, dest, 0);
+					int64_to_numericvar(tmp, &tmp_var);
+					add_var(dest, &tmp_var, dest);
+
+					/* Result will overflow if weight overflows int16 */
+					if (dest->weight > SHRT_MAX)
+						goto out_of_range;
+
+					/* Begin a new group */
+					tmp = 0;
+					mul = 1;
+				}
+
+				tmp = tmp * 16 + xdigit_value(*cp++);
+				mul = mul * 16;
+			}
+			else
+				break;
+		}
+	}
+	else if (base == 8)
+	{
+		while (*cp)
+		{
+			if (*cp >= '0' && *cp <= '7')
+			{
+				if (mul > PG_INT64_MAX / 8)
+				{
+					/* Add the contribution from this group of digits */
+					int64_to_numericvar(mul, &tmp_var);
+					mul_var(dest, &tmp_var, dest, 0);
+					int64_to_numericvar(tmp, &tmp_var);
+					add_var(dest, &tmp_var, dest);
+
+					/* Result will overflow if weight overflows int16 */
+					if (dest->weight > SHRT_MAX)
+						goto out_of_range;
+
+					/* Begin a new group */
+					tmp = 0;
+					mul = 1;
+				}
+
+				tmp = tmp * 8 + (*cp++ - '0');
+				mul = mul * 8;
+			}
+			else
+				break;
+		}
+	}
+	else if (base == 2)
+	{
+		while (*cp)
+		{
+			if (*cp >= '0' && *cp <= '1')
+			{
+				if (mul > PG_INT64_MAX / 2)
+				{
+					/* Add the contribution from this group of digits */
+					int64_to_numericvar(mul, &tmp_var);
+					mul_var(dest, &tmp_var, dest, 0);
+					int64_to_numericvar(tmp, &tmp_var);
+					add_var(dest, &tmp_var, dest);
+
+					/* Result will overflow if weight overflows int16 */
+					if (dest->weight > SHRT_MAX)
+						goto out_of_range;
+
+					/* Begin a new group */
+					tmp = 0;
+					mul = 1;
+				}
+
+				tmp = tmp * 2 + (*cp++ - '0');
+				mul = mul * 2;
+			}
+			else
+				break;
+		}
+	}
+	else
+		/* Should never happen; treat as invalid input */
+		goto invalid_syntax;
+
+	/* Check that we got at least one digit */
+	if (unlikely(cp == firstdigit))
+		goto invalid_syntax;
+
+	/* Add the contribution from the final group of digits */
+	int64_to_numericvar(mul, &tmp_var);
+	mul_var(dest, &tmp_var, dest, 0);
+	int64_to_numericvar(tmp, &tmp_var);
+	add_var(dest, &tmp_var, dest);
+
+	if (dest->weight > SHRT_MAX)
+		goto out_of_range;
+
+	dest->sign = sign;
+
+	free_var(&tmp_var);
+
+	/* Return end+1 position for caller */
+	*endptr = cp;
+
+	return true;
+
+out_of_range:
+	ereturn(escontext, false,
+			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+			 errmsg("value overflows numeric format")));
+
+invalid_syntax:
+	ereturn(escontext, false,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for type %s: \"%s\"",
+					"numeric", str)));
+}
+
+
 /*
  * set_var_from_num() -
  *
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 30a5613ed7..4eccc2086d 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -2144,6 +2144,12 @@ INSERT INTO num_input_test(n1) VALUES (' -inf ');
 INSERT INTO num_input_test(n1) VALUES (' Infinity ');
 INSERT INTO num_input_test(n1) VALUES (' +inFinity ');
 INSERT INTO num_input_test(n1) VALUES (' -INFINITY ');
+INSERT INTO num_input_test(n1) VALUES ('0b10001110111100111100001001010');
+INSERT INTO num_input_test(n1) VALUES ('  -0B1010101101010100101010011000110011101011000111110000101011010010  ');
+INSERT INTO num_input_test(n1) VALUES ('  +0o112402761777 ');
+INSERT INTO num_input_test(n1) VALUES ('-0O001255245230633431670261');
+INSERT INTO num_input_test(n1) VALUES ('-0x0000000000000000000000000deadbeef');
+INSERT INTO num_input_test(n1) VALUES (' 0X30b1F33a6DF0bD4E64DF9BdA7D15 ');
 -- bad inputs
 INSERT INTO num_input_test(n1) VALUES ('     ');
 ERROR:  invalid input syntax for type numeric: "     "
@@ -2181,23 +2187,41 @@ INSERT INTO num_input_test(n1) VALUES ('+ infinity');
 ERROR:  invalid input syntax for type numeric: "+ infinity"
 LINE 1: INSERT INTO num_input_test(n1) VALUES ('+ infinity');
                                                ^
+INSERT INTO num_input_test(n1) VALUES ('0b1112');
+ERROR:  invalid input syntax for type numeric: "0b1112"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0b1112');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('0c1112');
+ERROR:  invalid input syntax for type numeric: "0c1112"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0c1112');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('0x12.34');
+ERROR:  invalid input syntax for type numeric: "0x12.34"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0x12.34');
+                                               ^
 SELECT * FROM num_input_test;
-    n1     
------------
-       123
-   3245874
-    -93853
-    555.50
-   -555.50
-       NaN
-       NaN
-  Infinity
-  Infinity
- -Infinity
-  Infinity
-  Infinity
- -Infinity
-(13 rows)
+                n1                 
+-----------------------------------
+                               123
+                           3245874
+                            -93853
+                            555.50
+                           -555.50
+                               NaN
+                               NaN
+                          Infinity
+                          Infinity
+                         -Infinity
+                          Infinity
+                          Infinity
+                         -Infinity
+                         299792458
+             -12345678901234567890
+                        9999999999
+             -12345678900987654321
+                       -3735928559
+ 987654321234567898765432123456789
+(19 rows)
 
 -- Also try it with non-error-throwing API
 SELECT pg_input_is_valid('34.5', 'numeric');
@@ -2242,6 +2266,12 @@ SELECT pg_input_error_message('1234.567', 'numeric(7,4)');
  numeric field overflow
 (1 row)
 
+SELECT pg_input_error_message('0x1234.567', 'numeric');
+               pg_input_error_message                
+-----------------------------------------------------
+ invalid input syntax for type numeric: "0x1234.567"
+(1 row)
+
 --
 -- Test precision and scale typemods
 --
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index 15cd6b1672..deb26d31c3 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -104,9 +104,11 @@ SELECT 0b111111111111111111111111111111111111111111111111111111111111111;
 (1 row)
 
 SELECT 0b1000000000000000000000000000000000000000000000000000000000000000;
-ERROR:  invalid input syntax for type numeric: "0b1000000000000000000000000000000000000000000000000000000000000000"
-LINE 1: SELECT 0b100000000000000000000000000000000000000000000000000...
-               ^
+      ?column?       
+---------------------
+ 9223372036854775808
+(1 row)
+
 SELECT 0o777777777777777777777;
       ?column?       
 ---------------------
@@ -114,9 +116,11 @@ SELECT 0o777777777777777777777;
 (1 row)
 
 SELECT 0o1000000000000000000000;
-ERROR:  invalid input syntax for type numeric: "0o1000000000000000000000"
-LINE 1: SELECT 0o1000000000000000000000;
-               ^
+      ?column?       
+---------------------
+ 9223372036854775808
+(1 row)
+
 SELECT 0x7FFFFFFFFFFFFFFF;
       ?column?       
 ---------------------
@@ -124,9 +128,11 @@ SELECT 0x7FFFFFFFFFFFFFFF;
 (1 row)
 
 SELECT 0x8000000000000000;
-ERROR:  invalid input syntax for type numeric: "0x8000000000000000"
-LINE 1: SELECT 0x8000000000000000;
-               ^
+      ?column?       
+---------------------
+ 9223372036854775808
+(1 row)
+
 SELECT -0b1000000000000000000000000000000000000000000000000000000000000000;
        ?column?       
 ----------------------
@@ -134,9 +140,11 @@ SELECT -0b1000000000000000000000000000000000000000000000000000000000000000;
 (1 row)
 
 SELECT -0b1000000000000000000000000000000000000000000000000000000000000001;
-ERROR:  invalid input syntax for type numeric: "-0b1000000000000000000000000000000000000000000000000000000000000001"
-LINE 1: SELECT -0b10000000000000000000000000000000000000000000000000...
-               ^
+       ?column?       
+----------------------
+ -9223372036854775809
+(1 row)
+
 SELECT -0o1000000000000000000000;
        ?column?       
 ----------------------
@@ -144,9 +152,11 @@ SELECT -0o1000000000000000000000;
 (1 row)
 
 SELECT -0o1000000000000000000001;
-ERROR:  invalid input syntax for type numeric: "-0o1000000000000000000001"
-LINE 1: SELECT -0o1000000000000000000001;
-               ^
+       ?column?       
+----------------------
+ -9223372036854775809
+(1 row)
+
 SELECT -0x8000000000000000;
        ?column?       
 ----------------------
@@ -154,9 +164,11 @@ SELECT -0x8000000000000000;
 (1 row)
 
 SELECT -0x8000000000000001;
-ERROR:  invalid input syntax for type numeric: "-0x8000000000000001"
-LINE 1: SELECT -0x8000000000000001;
-               ^
+       ?column?       
+----------------------
+ -9223372036854775809
+(1 row)
+
 -- error cases
 SELECT 123abc;
 ERROR:  trailing junk after numeric literal at or near "123a"
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index 7bb34e5021..b04652e38b 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -1039,6 +1039,12 @@ INSERT INTO num_input_test(n1) VALUES (' -inf ');
 INSERT INTO num_input_test(n1) VALUES (' Infinity ');
 INSERT INTO num_input_test(n1) VALUES (' +inFinity ');
 INSERT INTO num_input_test(n1) VALUES (' -INFINITY ');
+INSERT INTO num_input_test(n1) VALUES ('0b10001110111100111100001001010');
+INSERT INTO num_input_test(n1) VALUES ('  -0B1010101101010100101010011000110011101011000111110000101011010010  ');
+INSERT INTO num_input_test(n1) VALUES ('  +0o112402761777 ');
+INSERT INTO num_input_test(n1) VALUES ('-0O001255245230633431670261');
+INSERT INTO num_input_test(n1) VALUES ('-0x0000000000000000000000000deadbeef');
+INSERT INTO num_input_test(n1) VALUES (' 0X30b1F33a6DF0bD4E64DF9BdA7D15 ');
 
 -- bad inputs
 INSERT INTO num_input_test(n1) VALUES ('     ');
@@ -1050,6 +1056,9 @@ INSERT INTO num_input_test(n1) VALUES ('5. 0   ');
 INSERT INTO num_input_test(n1) VALUES ('');
 INSERT INTO num_input_test(n1) VALUES (' N aN ');
 INSERT INTO num_input_test(n1) VALUES ('+ infinity');
+INSERT INTO num_input_test(n1) VALUES ('0b1112');
+INSERT INTO num_input_test(n1) VALUES ('0c1112');
+INSERT INTO num_input_test(n1) VALUES ('0x12.34');
 
 SELECT * FROM num_input_test;
 
@@ -1061,6 +1070,7 @@ SELECT pg_input_error_message('1e400000', 'numeric');
 SELECT pg_input_is_valid('1234.567', 'numeric(8,4)');
 SELECT pg_input_is_valid('1234.567', 'numeric(7,4)');
 SELECT pg_input_error_message('1234.567', 'numeric(7,4)');
+SELECT pg_input_error_message('0x1234.567', 'numeric');
 
 --
 -- Test precision and scale typemods
-- 
2.35.3

From fc354a7d0bfeca3b565f71f39416a9c5c99700fb Mon Sep 17 00:00:00 2001
From: Dean Rasheed <dean.a.rash...@gmail.com>
Date: Fri, 13 Jan 2023 09:33:58 +0000
Subject: [PATCH 2/2] Add underscore support to type numeric.

XXX: No parser support for such inputs yet. Merge with Peter's patch?
---
 src/backend/utils/adt/numeric.c       | 106 ++++++++++++++++++++------
 src/test/regress/expected/numeric.out |  62 ++++++++++++++-
 src/test/regress/sql/numeric.sql      |  22 +++++-
 3 files changed, 160 insertions(+), 30 deletions(-)

diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index ed592841dc..3ef4541c0f 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -6924,10 +6924,7 @@ set_var_from_str(const char *str, const char *cp,
 	}
 
 	if (!isdigit((unsigned char) *cp))
-		ereturn(escontext, false,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s: \"%s\"",
-						"numeric", str)));
+		goto invalid_syntax;
 
 	decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
 
@@ -6948,12 +6945,19 @@ set_var_from_str(const char *str, const char *cp,
 		else if (*cp == '.')
 		{
 			if (have_dp)
-				ereturn(escontext, false,
-						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-						 errmsg("invalid input syntax for type %s: \"%s\"",
-								"numeric", str)));
+				goto invalid_syntax;
 			have_dp = true;
 			cp++;
+			/* decimal point must not be followed by underscore */
+			if (*cp == '_')
+				goto invalid_syntax;
+		}
+		else if (*cp == '_')
+		{
+			/* underscore must be followed by more digits */
+			cp++;
+			if (!isdigit((unsigned char) *cp))
+				goto invalid_syntax;
 		}
 		else
 			break;
@@ -6966,17 +6970,8 @@ set_var_from_str(const char *str, const char *cp,
 	/* Handle exponent, if any */
 	if (*cp == 'e' || *cp == 'E')
 	{
-		long		exponent;
-		char	   *endptr;
-
-		cp++;
-		exponent = strtol(cp, &endptr, 10);
-		if (endptr == cp)
-			ereturn(escontext, false,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s: \"%s\"",
-							"numeric", str)));
-		cp = endptr;
+		long		exponent = 0;
+		bool		neg = false;
 
 		/*
 		 * At this point, dweight and dscale can't be more than about
@@ -6986,10 +6981,43 @@ set_var_from_str(const char *str, const char *cp,
 		 * fit in storage format, make_result() will complain about it later;
 		 * for consistency use the same ereport errcode/text as make_result().
 		 */
-		if (exponent >= INT_MAX / 2 || exponent <= -(INT_MAX / 2))
-			ereturn(escontext, false,
-					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-					 errmsg("value overflows numeric format")));
+
+		/* exponent sign */
+		cp++;
+		if (*cp == '+')
+			cp++;
+		else if (*cp == '-')
+		{
+			neg = true;
+			cp++;
+		}
+
+		/* exponent digits */
+		if (!isdigit((unsigned char) *cp))
+			goto invalid_syntax;
+
+		while (*cp)
+		{
+			if (isdigit((unsigned char) *cp))
+			{
+				exponent = exponent * 10 + (*cp++ - '0');
+				if (exponent > INT_MAX / 2)
+					goto out_of_range;
+			}
+			else if (*cp == '_')
+			{
+				/* underscore must be followed by more digits */
+				cp++;
+				if (!isdigit((unsigned char) *cp))
+					goto invalid_syntax;
+			}
+			else
+				break;
+		}
+
+		if (neg)
+			exponent = -exponent;
+
 		dweight += (int) exponent;
 		dscale -= (int) exponent;
 		if (dscale < 0)
@@ -7041,6 +7069,17 @@ set_var_from_str(const char *str, const char *cp,
 	*endptr = cp;
 
 	return true;
+
+out_of_range:
+	ereturn(escontext, false,
+			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+			 errmsg("value overflows numeric format")));
+
+invalid_syntax:
+	ereturn(escontext, false,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for type %s: \"%s\"",
+					"numeric", str)));
 }
 
 
@@ -7118,6 +7157,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
 				tmp = tmp * 16 + xdigit_value(*cp++);
 				mul = mul * 16;
 			}
+			else if (*cp == '_')
+			{
+				/* Underscore must be followed by more digits */
+				cp++;
+				if (!isxdigit((unsigned char) *cp))
+					goto invalid_syntax;
+			}
 			else
 				break;
 		}
@@ -7148,6 +7194,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
 				tmp = tmp * 8 + (*cp++ - '0');
 				mul = mul * 8;
 			}
+			else if (*cp == '_')
+			{
+				/* Underscore must be followed by more digits */
+				cp++;
+				if (*cp < '0' || *cp > '7')
+					goto invalid_syntax;
+			}
 			else
 				break;
 		}
@@ -7178,6 +7231,13 @@ set_var_from_non_decimal_integer_str(const char *str, const char *cp, int sign,
 				tmp = tmp * 2 + (*cp++ - '0');
 				mul = mul * 2;
 			}
+			else if (*cp == '_')
+			{
+				/* Underscore must be followed by more digits */
+				cp++;
+				if (*cp < '0' || *cp > '1')
+					goto invalid_syntax;
+			}
 			else
 				break;
 		}
diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out
index 4eccc2086d..a3fe3c771a 100644
--- a/src/test/regress/expected/numeric.out
+++ b/src/test/regress/expected/numeric.out
@@ -2144,12 +2144,17 @@ INSERT INTO num_input_test(n1) VALUES (' -inf ');
 INSERT INTO num_input_test(n1) VALUES (' Infinity ');
 INSERT INTO num_input_test(n1) VALUES (' +inFinity ');
 INSERT INTO num_input_test(n1) VALUES (' -INFINITY ');
+INSERT INTO num_input_test(n1) VALUES ('12_000_000_000');
+INSERT INTO num_input_test(n1) VALUES ('12_000.123_456');
+INSERT INTO num_input_test(n1) VALUES ('23_000_000_000e-1_0');
+INSERT INTO num_input_test(n1) VALUES ('.000_000_000_123e1_0');
+INSERT INTO num_input_test(n1) VALUES ('.000_000_000_123e+1_1');
 INSERT INTO num_input_test(n1) VALUES ('0b10001110111100111100001001010');
-INSERT INTO num_input_test(n1) VALUES ('  -0B1010101101010100101010011000110011101011000111110000101011010010  ');
+INSERT INTO num_input_test(n1) VALUES ('  -0B_1010_1011_0101_0100_1010_1001_1000_1100_1110_1011_0001_1111_0000_1010_1101_0010  ');
 INSERT INTO num_input_test(n1) VALUES ('  +0o112402761777 ');
-INSERT INTO num_input_test(n1) VALUES ('-0O001255245230633431670261');
+INSERT INTO num_input_test(n1) VALUES ('-0O0012_5524_5230_6334_3167_0261');
 INSERT INTO num_input_test(n1) VALUES ('-0x0000000000000000000000000deadbeef');
-INSERT INTO num_input_test(n1) VALUES (' 0X30b1F33a6DF0bD4E64DF9BdA7D15 ');
+INSERT INTO num_input_test(n1) VALUES (' 0X_30b1_F33a_6DF0_bD4E_64DF_9BdA_7D15 ');
 -- bad inputs
 INSERT INTO num_input_test(n1) VALUES ('     ');
 ERROR:  invalid input syntax for type numeric: "     "
@@ -2187,6 +2192,38 @@ INSERT INTO num_input_test(n1) VALUES ('+ infinity');
 ERROR:  invalid input syntax for type numeric: "+ infinity"
 LINE 1: INSERT INTO num_input_test(n1) VALUES ('+ infinity');
                                                ^
+INSERT INTO num_input_test(n1) VALUES ('_123');
+ERROR:  invalid input syntax for type numeric: "_123"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('_123');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('123_');
+ERROR:  invalid input syntax for type numeric: "123_"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('123_');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('12__34');
+ERROR:  invalid input syntax for type numeric: "12__34"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('12__34');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('123_.456');
+ERROR:  invalid input syntax for type numeric: "123_.456"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('123_.456');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('123._456');
+ERROR:  invalid input syntax for type numeric: "123._456"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('123._456');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('1.2e_34');
+ERROR:  invalid input syntax for type numeric: "1.2e_34"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('1.2e_34');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('1.2e34_');
+ERROR:  invalid input syntax for type numeric: "1.2e34_"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('1.2e34_');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('1.2e3__4');
+ERROR:  invalid input syntax for type numeric: "1.2e3__4"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('1.2e3__4');
+                                               ^
 INSERT INTO num_input_test(n1) VALUES ('0b1112');
 ERROR:  invalid input syntax for type numeric: "0b1112"
 LINE 1: INSERT INTO num_input_test(n1) VALUES ('0b1112');
@@ -2199,6 +2236,18 @@ INSERT INTO num_input_test(n1) VALUES ('0x12.34');
 ERROR:  invalid input syntax for type numeric: "0x12.34"
 LINE 1: INSERT INTO num_input_test(n1) VALUES ('0x12.34');
                                                ^
+INSERT INTO num_input_test(n1) VALUES ('0x__1234');
+ERROR:  invalid input syntax for type numeric: "0x__1234"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0x__1234');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('0x1234_');
+ERROR:  invalid input syntax for type numeric: "0x1234_"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0x1234_');
+                                               ^
+INSERT INTO num_input_test(n1) VALUES ('0x12__34');
+ERROR:  invalid input syntax for type numeric: "0x12__34"
+LINE 1: INSERT INTO num_input_test(n1) VALUES ('0x12__34');
+                                               ^
 SELECT * FROM num_input_test;
                 n1                 
 -----------------------------------
@@ -2215,13 +2264,18 @@ SELECT * FROM num_input_test;
                           Infinity
                           Infinity
                          -Infinity
+                       12000000000
+                      12000.123456
+                      2.3000000000
+                              1.23
+                              12.3
                          299792458
              -12345678901234567890
                         9999999999
              -12345678900987654321
                        -3735928559
  987654321234567898765432123456789
-(19 rows)
+(24 rows)
 
 -- Also try it with non-error-throwing API
 SELECT pg_input_is_valid('34.5', 'numeric');
diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql
index b04652e38b..9c160e7d0d 100644
--- a/src/test/regress/sql/numeric.sql
+++ b/src/test/regress/sql/numeric.sql
@@ -1039,12 +1039,17 @@ INSERT INTO num_input_test(n1) VALUES (' -inf ');
 INSERT INTO num_input_test(n1) VALUES (' Infinity ');
 INSERT INTO num_input_test(n1) VALUES (' +inFinity ');
 INSERT INTO num_input_test(n1) VALUES (' -INFINITY ');
+INSERT INTO num_input_test(n1) VALUES ('12_000_000_000');
+INSERT INTO num_input_test(n1) VALUES ('12_000.123_456');
+INSERT INTO num_input_test(n1) VALUES ('23_000_000_000e-1_0');
+INSERT INTO num_input_test(n1) VALUES ('.000_000_000_123e1_0');
+INSERT INTO num_input_test(n1) VALUES ('.000_000_000_123e+1_1');
 INSERT INTO num_input_test(n1) VALUES ('0b10001110111100111100001001010');
-INSERT INTO num_input_test(n1) VALUES ('  -0B1010101101010100101010011000110011101011000111110000101011010010  ');
+INSERT INTO num_input_test(n1) VALUES ('  -0B_1010_1011_0101_0100_1010_1001_1000_1100_1110_1011_0001_1111_0000_1010_1101_0010  ');
 INSERT INTO num_input_test(n1) VALUES ('  +0o112402761777 ');
-INSERT INTO num_input_test(n1) VALUES ('-0O001255245230633431670261');
+INSERT INTO num_input_test(n1) VALUES ('-0O0012_5524_5230_6334_3167_0261');
 INSERT INTO num_input_test(n1) VALUES ('-0x0000000000000000000000000deadbeef');
-INSERT INTO num_input_test(n1) VALUES (' 0X30b1F33a6DF0bD4E64DF9BdA7D15 ');
+INSERT INTO num_input_test(n1) VALUES (' 0X_30b1_F33a_6DF0_bD4E_64DF_9BdA_7D15 ');
 
 -- bad inputs
 INSERT INTO num_input_test(n1) VALUES ('     ');
@@ -1056,9 +1061,20 @@ INSERT INTO num_input_test(n1) VALUES ('5. 0   ');
 INSERT INTO num_input_test(n1) VALUES ('');
 INSERT INTO num_input_test(n1) VALUES (' N aN ');
 INSERT INTO num_input_test(n1) VALUES ('+ infinity');
+INSERT INTO num_input_test(n1) VALUES ('_123');
+INSERT INTO num_input_test(n1) VALUES ('123_');
+INSERT INTO num_input_test(n1) VALUES ('12__34');
+INSERT INTO num_input_test(n1) VALUES ('123_.456');
+INSERT INTO num_input_test(n1) VALUES ('123._456');
+INSERT INTO num_input_test(n1) VALUES ('1.2e_34');
+INSERT INTO num_input_test(n1) VALUES ('1.2e34_');
+INSERT INTO num_input_test(n1) VALUES ('1.2e3__4');
 INSERT INTO num_input_test(n1) VALUES ('0b1112');
 INSERT INTO num_input_test(n1) VALUES ('0c1112');
 INSERT INTO num_input_test(n1) VALUES ('0x12.34');
+INSERT INTO num_input_test(n1) VALUES ('0x__1234');
+INSERT INTO num_input_test(n1) VALUES ('0x1234_');
+INSERT INTO num_input_test(n1) VALUES ('0x12__34');
 
 SELECT * FROM num_input_test;
 
-- 
2.35.3

Re: Non-decimal integer literals

Reply via email to