On Sat, Jan 14, 2023 at 6:14 AM Gurjeet Singh <gurj...@singh.im> wrote:
>
> I agree that an identifier _surrounded_ by the same token (e.g. #foo#)
> or the pairing token (e.g. {foo}) looks better aesthetically, so I am
> okay with any of the following variations of the scheme, as well:
>
> \#foo\#  (tested; works)
> \#foo#   (not tested; reduces ident length by 1)
>
> We can choose a different character, instead of #. Perhaps \{foo} !

Please find attached the patch that uses \{foo} styled Named
Operators. This is in line with Tom's reluctant hint at possibly using
curly braces as delimiter characters. Since the curly braces are used
by the SQL Specification for row pattern recognition, this patch
proposes escaping the first of the curly braces.

We can get rid of the leading backslash, if (a) we're confident that
SQL committee will not use curly braces anywhere else, and (b) if
we're confident that if/when Postgres supports Row Pattern Recognition
feature, we'll be able to treat curly braces inside the PATTERN clause
specially. Since both of those conditions are unlikely, I think we
must settle for the escaped-first-curly-brace style for the naming our
operators.

Keeping with the previous posts, here's a sample SQL script showing
what the proposed syntax will look like in action. Personally, I
prefer the \#foo style, since the \# prefix stands out among the text,
better than \{..} does, and because # character is a better signal of
an operator than {.

create operator \{add_point}
    (function = box_add, leftarg = box, rightarg = point);
create table test(a box);
insert into test values('((0,0),(1,1))'), ('((0,0),(2,1))');
select a as original, a \{add_point} '(1,1)' as modified from test;
drop operator \{add_point}(box, point);

Best regards,
Gurjeet
http://Gurje.et
diff --git a/src/backend/catalog/pg_operator.c b/src/backend/catalog/pg_operator.c
index 1017f2eed1..c5b8562cb5 100644
--- a/src/backend/catalog/pg_operator.c
+++ b/src/backend/catalog/pg_operator.c
@@ -31,6 +31,7 @@
 #include "catalog/pg_type.h"
 #include "miscadmin.h"
 #include "parser/parse_oper.h"
+#include "parser/scansup.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/lsyscache.h"
@@ -79,6 +80,10 @@ validOperatorName(const char *name)
 	if (len == 0 || len >= NAMEDATALEN)
 		return false;
 
+	/* Is this a Named Operator? */
+	if (validNamedOperator(name))
+		return true;
+
 	/* Can't contain any invalid characters */
 	/* Test string here should match op_chars in scan.l */
 	if (strspn(name, "~!@#^&|`?+-*/%<>=") != len)
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index db8b0fe8eb..8587b82c8d 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -379,6 +379,16 @@ self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+
 
+/*
+ * Named Operators, e.g. \{foo}
+ *
+ * {namedopfailed*} are error rules to avoid scanner backup when
+ * {namedop} fails to match its trailing tokens.
+ */
+namedop			\\\{{identifier}\}
+namedopfailed1	\\\{{identifier}
+namedopfailed2	\\\{
+
 /*
  * Numbers
  *
@@ -768,6 +778,23 @@ other			.
 				}
 <xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); }
 
+{namedop}		{
+					SET_YYLLOC();
+					if (yyleng >= NAMEDATALEN)
+						yyerror("operator name too long");
+					/* XXX Should we support double-quoted, case sensitive names? */
+					yylval->str = downcase_identifier(yytext, yyleng, false, false);
+					return Op;
+				}
+
+{namedopfailed1}	{
+					yyerror("unexpected token");
+				}
+
+{namedopfailed2}	{
+					yyerror("unexpected token");
+				}
+
 {xdstart}		{
 					SET_YYLLOC();
 					BEGIN(xd);
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index 602108a40f..05c46ae09e 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -125,3 +125,70 @@ scanner_isspace(char ch)
 		return true;
 	return false;
 }
+
+/*
+ * validNamedOperator() -- return true if name adheres to the scanner rule
+ * {namedop}
+ */
+bool
+validNamedOperator(const char *name)
+{
+	size_t	len = strlen(name);
+	bool	valid_identifier;
+	char   *tmp;
+
+	if (len < 4 || len >= NAMEDATALEN)
+	   return false;
+
+	if (name[0] != '\\' || name[1] != '{' || name[len-1] != '}')
+		return false;
+
+	tmp = pstrdup(name);
+
+	// Disregard the delimiters
+	tmp[len-1] = '\0';
+	valid_identifier = validIdentifier(tmp + 2);
+	pfree(tmp);
+
+	return valid_identifier;
+}
+
+/*
+ * validIdentifier() -- return true if name adheres to the scanner rule
+ * {identifier}
+ *
+ * Note: this function does not check if the identifier length
+ * is less than NAMEDATALEN.
+ */
+bool
+validIdentifier(const char *name)
+{
+	uint8	c;
+	size_t	i, len = strlen(name);
+
+	// Reject if first character is not part of ident_start
+	c = name[0];
+	if ( !(c == '_'
+		|| (c >='A' && c <= 'Z')
+		|| (c >='a' && c <= 'z')
+		|| (c >= 0200 && c <= 0377)))
+	{
+		return false;
+	}
+
+	// Reject if other characters are not part of ident_cont
+	for (i = 1; i < len; ++i)
+	{
+		c = name[i];
+		if ( !(c == '_' || c == '$'
+			|| (c >='A' && c <= 'Z')
+			|| (c >='a' && c <= 'z')
+			|| (c >='0' && c <= '9')
+			|| (c >= 0200 && c <= 0377)))
+		{
+			return false;
+		}
+	}
+
+	return true;
+}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index ae531ec240..98a2561886 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -317,6 +317,16 @@ self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+
 
+/*
+ * Named Operators, e.g. :foo:
+ *
+ * {namedopfailed*} are error rules to avoid scanner backup when
+ * {namedop} fails to match its trailing tokens.
+ */
+namedop			\\\{{identifier}\}
+namedopfailed1	\\\{{identifier}
+namedopfailed2	\\\{
+
 /*
  * Numbers
  *
@@ -570,6 +580,18 @@ other			.
 					ECHO;
 				}
 
+{namedop}		{
+					ECHO;
+				}
+
+{namedopfailed1}	{
+					ECHO;
+				}
+
+{namedopfailed2}	{
+					ECHO;
+				}
+
 {xdstart}		{
 					BEGIN(xd);
 					ECHO;
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index ff65224bf6..0f6aff8b44 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -24,4 +24,7 @@ extern void truncate_identifier(char *ident, int len, bool warn);
 
 extern bool scanner_isspace(char ch);
 
+extern bool validNamedOperator(const char *name);
+extern bool validIdentifier(const char *name);
+
 #endif							/* SCANSUP_H */

Reply via email to