Attached is a patch to add nondecimal integer literals and underscores in numeric literals to the SQL JSON path language. This matches the recent additions to the core SQL syntax. It follows ECMAScript in combination with the current SQL draft.

Internally, all the numeric literal parsing of jsonpath goes through numeric_in, which already supports all this, so this patch is just a bit of lexer work and some tests.
From abeefa990231dea398ddd923d9e992e0ad945159 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 27 Feb 2023 19:27:32 +0100
Subject: [PATCH v1] SQL JSON path enhanced numeric literals

Add support for non-decimal integer literals and underscores in
numeric literals to SQL JSON path language.  This follows the rules of
ECMAScript, as referred to by the SQL standard.
---
 src/backend/catalog/sql_features.txt   |   1 +
 src/backend/utils/adt/jsonpath_scan.l  |  59 ++++++---
 src/test/regress/expected/jsonpath.out | 162 +++++++++++++++++++++++++
 src/test/regress/sql/jsonpath.sql      |  50 ++++++++
 4 files changed, 258 insertions(+), 14 deletions(-)

diff --git a/src/backend/catalog/sql_features.txt 
b/src/backend/catalog/sql_features.txt
index 75a09f14e0..a8300ad694 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -553,6 +553,7 @@ T836        SQL/JSON path language: starts with predicate   
                YES
 T837   SQL/JSON path language: regex_like predicate                    YES     
 T838   JSON_TABLE: PLAN DEFAULT clause                 NO      
 T839   Formatted cast of datetimes to/from character strings                   
NO      
+T840   Hex integer literals in SQL/JSON path language                  NO      
SQL:202x draft
 M001   Datalinks                       NO      
 M002   Datalinks via SQL/CLI                   NO      
 M003   Datalinks via Embedded SQL                      NO      
diff --git a/src/backend/utils/adt/jsonpath_scan.l 
b/src/backend/utils/adt/jsonpath_scan.l
index e08b1c7cd7..cd550ad5d8 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -90,21 +90,31 @@ blank               [ \t\n\r\f]
 /* "other" means anything that's not special, blank, or '\' or '"' */
 other          [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
 
-digit          [0-9]
-integer                (0|[1-9]{digit}*)
-decimal                ({integer}\.{digit}*|\.{digit}+)
-real           ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail       ({integer}|{decimal})[Ee][-+]
-
-integer_junk   {integer}{other}
+decdigit       [0-9]
+hexdigit       [0-9A-Fa-f]
+octdigit       [0-7]
+bindigit       [0-1]
+
+/* DecimalInteger in ECMAScript; must not start with 0 unless it's exactly 0 */
+decinteger     (0|[1-9](_?{decdigit})*)
+/* DecimalDigits in ECMAScript; only used as part of other rules */
+decdigits      {decdigit}(_?{decdigit})*
+hexinteger     0[xX]{hexdigit}(_?{hexdigit})*
+octinteger     0[oO]{octdigit}(_?{octdigit})*
+bininteger     0[bB]{bindigit}(_?{bindigit})*
+
+decimal                ({decinteger}\.{decdigits}?|\.{decdigits})
+real           ({decinteger}|{decimal})[Ee][-+]?{decdigits}
+realfail       ({decinteger}|{decimal})[Ee][-+]
+
+decinteger_junk        {decinteger}{other}
 decimal_junk   {decimal}{other}
 real_junk              {real}{other}
 
-hex_dig                [0-9A-Fa-f]
-unicode                \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
-unicodefail    \\u({hex_dig}{0,3}|\{{hex_dig}{0,6})
-hex_char       \\x{hex_dig}{2}
-hex_fail       \\x{hex_dig}{0,1}
+unicode                \\u({hexdigit}{4}|\{{hexdigit}{1,6}\})
+unicodefail    \\u({hexdigit}{0,3}|\{{hexdigit}{0,6})
+hex_char       \\x{hexdigit}{2}
+hex_fail       \\x{hexdigit}{0,1}
 
 %%
 
@@ -274,7 +284,28 @@ hex_fail   \\x{hex_dig}{0,1}
                                                                        return 
NUMERIC_P;
                                                                }
 
-{integer}                                              {
+{decinteger}                                   {
+                                                                       
addstring(true, yytext, yyleng);
+                                                                       
addchar(false, '\0');
+                                                                       
yylval->str = scanstring;
+                                                                       return 
INT_P;
+                                                               }
+
+{hexinteger}                                   {
+                                                                       
addstring(true, yytext, yyleng);
+                                                                       
addchar(false, '\0');
+                                                                       
yylval->str = scanstring;
+                                                                       return 
INT_P;
+                                                               }
+
+{octinteger}                                   {
+                                                                       
addstring(true, yytext, yyleng);
+                                                                       
addchar(false, '\0');
+                                                                       
yylval->str = scanstring;
+                                                                       return 
INT_P;
+                                                               }
+
+{bininteger}                                   {
                                                                        
addstring(true, yytext, yyleng);
                                                                        
addchar(false, '\0');
                                                                        
yylval->str = scanstring;
@@ -287,7 +318,7 @@ hex_fail    \\x{hex_dig}{0,1}
                                                                                
"invalid numeric literal");
                                                                        
yyterminate();
                                                                }
-{integer_junk}                                 {
+{decinteger_junk}                              {
                                                                        
jsonpath_yyerror(
                                                                                
NULL, escontext,
                                                                                
"trailing junk after numeric literal");
diff --git a/src/test/regress/expected/jsonpath.out 
b/src/test/regress/expected/jsonpath.out
index ca0cdf1ab2..3eda8e3a8b 100644
--- a/src/test/regress/expected/jsonpath.out
+++ b/src/test/regress/expected/jsonpath.out
@@ -836,6 +836,7 @@ select '$ ? (@.a < +10.1e+1)'::jsonpath;
  $?(@."a" < 101)
 (1 row)
 
+-- numeric literals
 select '0'::jsonpath;
  jsonpath 
 ----------
@@ -846,6 +847,10 @@ select '00'::jsonpath;
 ERROR:  trailing junk after numeric literal at or near "00" of jsonpath input
 LINE 1: select '00'::jsonpath;
                ^
+select '0755'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0755'::jsonpath;
+               ^
 select '0.0'::jsonpath;
  jsonpath 
 ----------
@@ -1032,6 +1037,163 @@ select '1?(2>3)'::jsonpath;
  (1)?(2 > 3)
 (1 row)
 
+-- nondecimal
+select '0b100101'::jsonpath;
+ jsonpath 
+----------
+ 37
+(1 row)
+
+select '0o273'::jsonpath;
+ jsonpath 
+----------
+ 187
+(1 row)
+
+select '0x42F'::jsonpath;
+ jsonpath 
+----------
+ 1071
+(1 row)
+
+-- error cases
+select '0b'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "0b" of jsonpath input
+LINE 1: select '0b'::jsonpath;
+               ^
+select '1b'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1b" of jsonpath input
+LINE 1: select '1b'::jsonpath;
+               ^
+select '0b0x'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0b0x'::jsonpath;
+               ^
+select '0o'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "0o" of jsonpath input
+LINE 1: select '0o'::jsonpath;
+               ^
+select '1o'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1o" of jsonpath input
+LINE 1: select '1o'::jsonpath;
+               ^
+select '0o0x'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0o0x'::jsonpath;
+               ^
+select '0x'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "0x" of jsonpath input
+LINE 1: select '0x'::jsonpath;
+               ^
+select '1x'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1x" of jsonpath input
+LINE 1: select '1x'::jsonpath;
+               ^
+select '0x0y'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0x0y'::jsonpath;
+               ^
+-- underscores
+select '1_000_000'::jsonpath;
+ jsonpath 
+----------
+ 1000000
+(1 row)
+
+select '1_2_3'::jsonpath;
+ jsonpath 
+----------
+ 123
+(1 row)
+
+select '0x1EEE_FFFF'::jsonpath;
+ jsonpath  
+-----------
+ 518979583
+(1 row)
+
+select '0o2_73'::jsonpath;
+ jsonpath 
+----------
+ 187
+(1 row)
+
+select '0b10_0101'::jsonpath;
+ jsonpath 
+----------
+ 37
+(1 row)
+
+select '1_000.000_005'::jsonpath;
+  jsonpath   
+-------------
+ 1000.000005
+(1 row)
+
+select '1_000.'::jsonpath;
+ jsonpath 
+----------
+ 1000
+(1 row)
+
+select '.000_005'::jsonpath;
+ jsonpath 
+----------
+ 0.000005
+(1 row)
+
+select '1_000.5e0_1'::jsonpath;
+ jsonpath 
+----------
+ 10005
+(1 row)
+
+-- error cases
+select '_100'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '_100'::jsonpath;
+               ^
+select '100_'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "100_" of jsonpath input
+LINE 1: select '100_'::jsonpath;
+               ^
+select '100__000'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '100__000'::jsonpath;
+               ^
+select '_1_000.5'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '_1_000.5'::jsonpath;
+               ^
+select '1_000_.5'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1_000_" of jsonpath 
input
+LINE 1: select '1_000_.5'::jsonpath;
+               ^
+select '1_000._5'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1_000._" of jsonpath 
input
+LINE 1: select '1_000._5'::jsonpath;
+               ^
+select '1_000.5_'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1_000.5_" of jsonpath 
input
+LINE 1: select '1_000.5_'::jsonpath;
+               ^
+select '1_000.5e_1'::jsonpath;
+ERROR:  trailing junk after numeric literal at or near "1_000.5e" of jsonpath 
input
+LINE 1: select '1_000.5e_1'::jsonpath;
+               ^
+-- underscore after prefix not allowed in JavaScript (but allowed in SQL)
+select '0b_10_0101'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0b_10_0101'::jsonpath;
+               ^
+select '0o_273'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0o_273'::jsonpath;
+               ^
+select '0x_42F'::jsonpath;
+ERROR:  syntax error at end of jsonpath input
+LINE 1: select '0x_42F'::jsonpath;
+               ^
 -- test non-error-throwing API
 SELECT str as jsonpath,
        pg_input_is_valid(str,'jsonpath') as ok,
diff --git a/src/test/regress/sql/jsonpath.sql 
b/src/test/regress/sql/jsonpath.sql
index 99d21d2af7..214d6bc1d5 100644
--- a/src/test/regress/sql/jsonpath.sql
+++ b/src/test/regress/sql/jsonpath.sql
@@ -152,8 +152,11 @@
 select '$ ? (@.a < -10.1e+1)'::jsonpath;
 select '$ ? (@.a < +10.1e+1)'::jsonpath;
 
+-- numeric literals
+
 select '0'::jsonpath;
 select '00'::jsonpath;
+select '0755'::jsonpath;
 select '0.0'::jsonpath;
 select '0.000'::jsonpath;
 select '0.000e1'::jsonpath;
@@ -188,6 +191,53 @@
 select '(1.).e3'::jsonpath;
 select '1?(2>3)'::jsonpath;
 
+-- nondecimal
+select '0b100101'::jsonpath;
+select '0o273'::jsonpath;
+select '0x42F'::jsonpath;
+
+-- error cases
+select '0b'::jsonpath;
+select '1b'::jsonpath;
+select '0b0x'::jsonpath;
+
+select '0o'::jsonpath;
+select '1o'::jsonpath;
+select '0o0x'::jsonpath;
+
+select '0x'::jsonpath;
+select '1x'::jsonpath;
+select '0x0y'::jsonpath;
+
+-- underscores
+select '1_000_000'::jsonpath;
+select '1_2_3'::jsonpath;
+select '0x1EEE_FFFF'::jsonpath;
+select '0o2_73'::jsonpath;
+select '0b10_0101'::jsonpath;
+
+select '1_000.000_005'::jsonpath;
+select '1_000.'::jsonpath;
+select '.000_005'::jsonpath;
+select '1_000.5e0_1'::jsonpath;
+
+-- error cases
+select '_100'::jsonpath;
+select '100_'::jsonpath;
+select '100__000'::jsonpath;
+
+select '_1_000.5'::jsonpath;
+select '1_000_.5'::jsonpath;
+select '1_000._5'::jsonpath;
+select '1_000.5_'::jsonpath;
+select '1_000.5e_1'::jsonpath;
+
+-- underscore after prefix not allowed in JavaScript (but allowed in SQL)
+select '0b_10_0101'::jsonpath;
+select '0o_273'::jsonpath;
+select '0x_42F'::jsonpath;
+
+
 -- test non-error-throwing API
 
 SELECT str as jsonpath,

base-commit: 46647cc4b86aa529f50b343a6a8cdb8f187ec8da
-- 
2.39.2

Reply via email to