small cleanup: unify scanstr() functions

John Naylor Thu, 01 Oct 2020 05:10:03 -0700

Hi,

Looking at the guc file code, GUC_scanstr() is almost the same as the
exported function scanstr(), except the latter requires finicky extra
coding around double quotes both in its callers and while creating the
input.


In the attached, the GUC_scanstr() function body is moved to scansup.c
to replace scanstr(), but with a different order of if-statements to
make the diff smaller. Since we have control over what goes in the BKI
file, we can use single-quoted escape strings there, allowing removal
of special case code for double quotes.

--
John Naylor                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services

From 93fce2a68674e1f3ec8999b15c8526535e12fffa Mon Sep 17 00:00:00 2001
From: John Naylor <[email protected]>
Date: Mon, 14 Sep 2020 11:29:43 -0400
Subject: [PATCH] Unify scanstr() implementations

Use single-quoted escaped strings in the BKI file, the same as those
used for GUC parameters. Rewrite scanstr() to match GUC_scanstr() and
remove the latter. Get rid of special case coding around double quotes
both in the callers of scanstr() and while formatting the input.
---
 doc/src/sgml/bki.sgml               |  6 +-
 src/backend/bootstrap/bootscanner.l |  9 +--
 src/backend/catalog/genbki.pl       |  6 +-
 src/backend/parser/scansup.c        | 34 +++++-----
 src/backend/utils/misc/guc-file.l   | 97 +----------------------------
 src/bin/initdb/initdb.c             | 34 +++-------
 6 files changed, 35 insertions(+), 151 deletions(-)

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 4e696d1d3e..b7f0bccf35 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -752,7 +752,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
    next token that syntactically cannot belong to the preceding
    command starts a new one.  (Usually you would put a new command on
    a new line, for clarity.)  Tokens can be certain key words, special
-   characters (parentheses, commas, etc.), numbers, or double-quoted
+   characters (parentheses, commas, etc.), numbers, or single-quoted
    strings.  Everything is case sensitive.
   </para>
 
@@ -876,7 +876,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
      <para>
       NULL values can be specified using the special key word
       <literal>_null_</literal>.  Values that do not look like
-      identifiers or digit strings must be double quoted.
+      identifiers or digit strings must be single-quoted.
      </para>
     </listitem>
    </varlistentry>
@@ -1046,7 +1046,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
 <programlisting>
 create test_table 420 (oid = oid, cola = int4, colb = text)
 open test_table
-insert ( 421 1 "value1" )
+insert ( 421 1 'value 1' )
 insert ( 422 2 _null_ )
 close test_table
 </programlisting>
diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l
index 1048e70d05..452dc067d0 100644
--- a/src/backend/bootstrap/bootscanner.l
+++ b/src/backend/bootstrap/bootscanner.l
@@ -66,7 +66,7 @@ static int	yyline = 1;			/* line number for error reporting */
 
 
 id		[-A-Za-z0-9_]+
-sid		\"([^\"])*\"
+sid		\'([^']|\'\')*\'
 
 /*
  * Keyword tokens return the keyword text (as a constant string) in yylval.kw,
@@ -120,14 +120,11 @@ NOT				{ yylval.kw = "NOT"; return XNOT; }
 NULL			{ yylval.kw = "NULL"; return XNULL; }
 
 {id}			{
-					yylval.str = scanstr(yytext);
+					yylval.str = pstrdup(yytext);
 					return ID;
 				}
 {sid}			{
-					/* leading and trailing quotes are not passed to scanstr */
-					yytext[strlen(yytext) - 1] = '\0';
-					yylval.str = scanstr(yytext+1);
-					yytext[strlen(yytext)] = '"';	/* restore yytext */
+					yylval.str = scanstr(yytext);
 					return ID;
 				}
 
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index dc5f442397..ef3105af44 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -845,17 +845,15 @@ sub print_bki_insert
 		# since that represents a NUL char in C code.
 		$bki_value = '' if $bki_value eq '\0';
 
-		# Handle single quotes by doubling them, and double quotes by
-		# converting them to octal escapes, because that's what the
+		# Handle single quotes by doubling them, because that's what the
 		# bootstrap scanner requires.  We do not process backslashes
 		# specially; this allows escape-string-style backslash escapes
 		# to be used in catalog data.
 		$bki_value =~ s/'/''/g;
-		$bki_value =~ s/"/\\042/g;
 
 		# Quote value if needed.  We need not quote values that satisfy
 		# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
-		$bki_value = sprintf(qq'"%s"', $bki_value)
+		$bki_value = sprintf("'%s'", $bki_value)
 		  if length($bki_value) == 0
 		  or $bki_value =~ /[^-A-Za-z0-9_]/;
 
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index cac70d5df7..f35ee91d64 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -23,8 +23,8 @@
 /* ----------------
  *		scanstr
  *
- * if the string passed in has escaped codes, map the escape codes to actual
- * chars
+ * Strip the quotes surrounding the given string, and collapse any embedded
+ * '' sequences and backslash escapes.
  *
  * the string returned is palloc'd and should eventually be pfree'd by the
  * caller!
@@ -39,25 +39,23 @@ scanstr(const char *s)
 				i,
 				j;
 
-	if (s == NULL || s[0] == '\0')
-		return pstrdup("");
-
+	Assert(s != NULL && s[0] == '\'');
 	len = strlen(s);
+	Assert(len >= 2);
+	Assert(s[len - 1] == '\'');
+
+	/* Skip the leading quote; we'll handle the trailing quote below */
+	s++, len--;
 
-	newStr = palloc(len + 1);	/* string cannot get longer */
+	/* Since len still includes trailing quote, this is enough space */
+	newStr = palloc(len);
 
 	for (i = 0, j = 0; i < len; i++)
 	{
-		if (s[i] == '\'')
+		if (s[i] == '\'' && s[i + 1] == '\'')
 		{
-			/*
-			 * Note: if scanner is working right, unescaped quotes can only
-			 * appear in pairs, so there should be another character.
-			 */
-			i++;
-			/* The bootstrap parser is not as smart, so check here. */
-			Assert(s[i] == '\'');
-			newStr[j] = s[i];
+			/* doubled quote becomes just one quote */
+			newStr[j] = s[++i];
 		}
 		else if (s[i] == '\\')
 		{
@@ -108,7 +106,11 @@ scanstr(const char *s)
 			newStr[j] = s[i];
 		j++;
 	}
-	newStr[j] = '\0';
+
+	/* We copied the ending quote to newStr, so replace with \0 */
+	Assert(j > 0 && j <= len);
+	newStr[--j] = '\0';
+
 	return newStr;
 }
 
diff --git a/src/backend/utils/misc/guc-file.l b/src/backend/utils/misc/guc-file.l
index 268b745528..a73f0171b0 100644
--- a/src/backend/utils/misc/guc-file.l
+++ b/src/backend/utils/misc/guc-file.l
@@ -16,6 +16,7 @@
 
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "parser/scansup.h"
 #include "storage/fd.h"
 #include "utils/guc.h"
 
@@ -55,7 +56,6 @@ static void record_config_file_error(const char *errmsg,
 						 ConfigVariable **tail_p);
 
 static int	GUC_flex_fatal(const char *msg);
-static char *GUC_scanstr(const char *s);
 
 /* LCOV_EXCL_START */
 
@@ -797,7 +797,7 @@ ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
 			token != GUC_UNQUOTED_STRING)
 			goto parse_error;
 		if (token == GUC_STRING)	/* strip quotes and escapes */
-			opt_value = GUC_scanstr(yytext);
+			opt_value = scanstr(yytext);
 		else
 			opt_value = pstrdup(yytext);
 
@@ -1129,96 +1129,3 @@ FreeConfigVariable(ConfigVariable *item)
 		pfree(item->filename);
 	pfree(item);
 }
-
-
-/*
- *		scanstr
- *
- * Strip the quotes surrounding the given string, and collapse any embedded
- * '' sequences and backslash escapes.
- *
- * the string returned is palloc'd and should eventually be pfree'd by the
- * caller.
- */
-static char *
-GUC_scanstr(const char *s)
-{
-	char	   *newStr;
-	int			len,
-				i,
-				j;
-
-	Assert(s != NULL && s[0] == '\'');
-	len = strlen(s);
-	Assert(len >= 2);
-	Assert(s[len - 1] == '\'');
-
-	/* Skip the leading quote; we'll handle the trailing quote below */
-	s++, len--;
-
-	/* Since len still includes trailing quote, this is enough space */
-	newStr = palloc(len);
-
-	for (i = 0, j = 0; i < len; i++)
-	{
-		if (s[i] == '\\')
-		{
-			i++;
-			switch (s[i])
-			{
-				case 'b':
-					newStr[j] = '\b';
-					break;
-				case 'f':
-					newStr[j] = '\f';
-					break;
-				case 'n':
-					newStr[j] = '\n';
-					break;
-				case 'r':
-					newStr[j] = '\r';
-					break;
-				case 't':
-					newStr[j] = '\t';
-					break;
-				case '0':
-				case '1':
-				case '2':
-				case '3':
-				case '4':
-				case '5':
-				case '6':
-				case '7':
-					{
-						int			k;
-						long		octVal = 0;
-
-						for (k = 0;
-							 s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
-							 k++)
-							octVal = (octVal << 3) + (s[i + k] - '0');
-						i += k - 1;
-						newStr[j] = ((char) octVal);
-					}
-					break;
-				default:
-					newStr[j] = s[i];
-					break;
-			}					/* switch */
-		}
-		else if (s[i] == '\'' && s[i + 1] == '\'')
-		{
-			/* doubled quote becomes just one quote */
-			newStr[j] = s[++i];
-		}
-		else
-			newStr[j] = s[i];
-		j++;
-	}
-
-	/* We copied the ending quote to newStr, so replace with \0 */
-	Assert(j > 0 && j <= len);
-	newStr[--j] = '\0';
-
-	return newStr;
-}
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 37e0d7ceab..e4d45f5b2f 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -331,12 +331,9 @@ escape_quotes(const char *src)
 
 /*
  * Escape a field value to be inserted into the BKI data.
- * Here, we first run the value through escape_quotes (which
- * will be inverted by the backend's scanstr() function) and
- * then overlay special processing of double quotes, which
- * bootscanner.l will only accept as data if converted to octal
- * representation ("\042").  We always wrap the value in double
- * quotes, even if that isn't strictly necessary.
+ * Run the value through escape_quotes (which will be inverted
+ * by the backend's scanstr() function), then wrap the value in
+ * single quotes, even if that isn't strictly necessary.
  */
 static char *
 escape_quotes_bki(const char *src)
@@ -345,30 +342,13 @@ escape_quotes_bki(const char *src)
 	char	   *data = escape_quotes(src);
 	char	   *resultp;
 	char	   *datap;
-	int			nquotes = 0;
 
-	/* count double quotes in data */
-	datap = data;
-	while ((datap = strchr(datap, '"')) != NULL)
-	{
-		nquotes++;
-		datap++;
-	}
-
-	result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
+	result = (char *) pg_malloc(strlen(data) + 3);
 	resultp = result;
-	*resultp++ = '"';
+	*resultp++ = '\'';
 	for (datap = data; *datap; datap++)
-	{
-		if (*datap == '"')
-		{
-			strcpy(resultp, "\\042");
-			resultp += 4;
-		}
-		else
-			*resultp++ = *datap;
-	}
-	*resultp++ = '"';
+		*resultp++ = *datap;
+	*resultp++ = '\'';
 	*resultp = '\0';
 
 	free(data);
-- 
2.22.0

small cleanup: unify scanstr() functions

Reply via email to