From 54fc726cf6244b3b88a430bc9de1a67ecb52e718 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Thu, 1 Oct 2020 13:40:37 -0400
Subject: [PATCH v2] Remove duplicate scanstr() function

This function was only used by the bootstrap scanner, but it's location
implied it was also used by the core scanner. In addition, it seems
to have been designed to work with single-quoted strings, since it
expects literal single quotes in the string value to be escaped by
doubling. Despite this, it was only used on double-quoted strings,
requiring some finicky coding in both the scanner and while emitting
BKI data.

Instead, use single-quoted escaped strings in the BKI data, the same as
those used for GUC parameters. Remove scanstr() and rename GUC_scanstr()
to reflect what it's actually doing.
---
 doc/src/sgml/bki.sgml               |  6 +-
 src/backend/bootstrap/bootscanner.l | 12 ++--
 src/backend/catalog/genbki.pl       |  6 +-
 src/backend/parser/scansup.c        | 92 -----------------------------
 src/backend/utils/misc/guc-file.l   |  9 ++-
 src/bin/initdb/initdb.c             | 34 +++--------
 src/include/parser/scansup.h        |  5 +-
 src/include/utils/guc.h             |  1 +
 8 files changed, 23 insertions(+), 142 deletions(-)

diff --git a/doc/src/sgml/bki.sgml b/doc/src/sgml/bki.sgml
index 4e696d1d3e..b7f0bccf35 100644
--- a/doc/src/sgml/bki.sgml
+++ b/doc/src/sgml/bki.sgml
@@ -752,7 +752,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
    next token that syntactically cannot belong to the preceding
    command starts a new one.  (Usually you would put a new command on
    a new line, for clarity.)  Tokens can be certain key words, special
-   characters (parentheses, commas, etc.), numbers, or double-quoted
+   characters (parentheses, commas, etc.), numbers, or single-quoted
    strings.  Everything is case sensitive.
   </para>
 
@@ -876,7 +876,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
      <para>
       NULL values can be specified using the special key word
       <literal>_null_</literal>.  Values that do not look like
-      identifiers or digit strings must be double quoted.
+      identifiers or digit strings must be single-quoted.
      </para>
     </listitem>
    </varlistentry>
@@ -1046,7 +1046,7 @@ $ perl  rewrite_dat_with_prokind.pl  pg_proc.dat
 <programlisting>
 create test_table 420 (oid = oid, cola = int4, colb = text)
 open test_table
-insert ( 421 1 "value1" )
+insert ( 421 1 'value 1' )
 insert ( 422 2 _null_ )
 close test_table
 </programlisting>
diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l
index 1048e70d05..fde4ef1131 100644
--- a/src/backend/bootstrap/bootscanner.l
+++ b/src/backend/bootstrap/bootscanner.l
@@ -27,12 +27,12 @@
 #include "nodes/parsenodes.h"
 #include "nodes/pg_list.h"
 #include "nodes/primnodes.h"
-#include "parser/scansup.h"
 #include "rewrite/prs2lock.h"
 #include "storage/block.h"
 #include "storage/fd.h"
 #include "storage/itemptr.h"
 #include "storage/off.h"
+#include "utils/guc.h"		/* needed for DeescapeQuotedString */
 #include "utils/rel.h"
 
 /* Not needed now that this file is compiled as part of bootparse. */
@@ -66,7 +66,7 @@ static int	yyline = 1;			/* line number for error reporting */
 
 
 id		[-A-Za-z0-9_]+
-sid		\"([^\"])*\"
+sid		\'([^']|\'\')*\'
 
 /*
  * Keyword tokens return the keyword text (as a constant string) in yylval.kw,
@@ -120,14 +120,12 @@ NOT				{ yylval.kw = "NOT"; return XNOT; }
 NULL			{ yylval.kw = "NULL"; return XNULL; }
 
 {id}			{
-					yylval.str = scanstr(yytext);
+					yylval.str = pstrdup(yytext);
 					return ID;
 				}
 {sid}			{
-					/* leading and trailing quotes are not passed to scanstr */
-					yytext[strlen(yytext) - 1] = '\0';
-					yylval.str = scanstr(yytext+1);
-					yytext[strlen(yytext)] = '"';	/* restore yytext */
+					/* strip quotes and escapes */
+					yylval.str = DeescapeQuotedString(yytext);
 					return ID;
 				}
 
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index dc5f442397..ef3105af44 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -845,17 +845,15 @@ sub print_bki_insert
 		# since that represents a NUL char in C code.
 		$bki_value = '' if $bki_value eq '\0';
 
-		# Handle single quotes by doubling them, and double quotes by
-		# converting them to octal escapes, because that's what the
+		# Handle single quotes by doubling them, because that's what the
 		# bootstrap scanner requires.  We do not process backslashes
 		# specially; this allows escape-string-style backslash escapes
 		# to be used in catalog data.
 		$bki_value =~ s/'/''/g;
-		$bki_value =~ s/"/\\042/g;
 
 		# Quote value if needed.  We need not quote values that satisfy
 		# the "id" pattern in bootscanner.l, currently "[-A-Za-z0-9_]+".
-		$bki_value = sprintf(qq'"%s"', $bki_value)
+		$bki_value = sprintf("'%s'", $bki_value)
 		  if length($bki_value) == 0
 		  or $bki_value =~ /[^-A-Za-z0-9_]/;
 
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index cac70d5df7..021f234360 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -20,98 +20,6 @@
 #include "mb/pg_wchar.h"
 #include "parser/scansup.h"
 
-/* ----------------
- *		scanstr
- *
- * if the string passed in has escaped codes, map the escape codes to actual
- * chars
- *
- * the string returned is palloc'd and should eventually be pfree'd by the
- * caller!
- * ----------------
- */
-
-char *
-scanstr(const char *s)
-{
-	char	   *newStr;
-	int			len,
-				i,
-				j;
-
-	if (s == NULL || s[0] == '\0')
-		return pstrdup("");
-
-	len = strlen(s);
-
-	newStr = palloc(len + 1);	/* string cannot get longer */
-
-	for (i = 0, j = 0; i < len; i++)
-	{
-		if (s[i] == '\'')
-		{
-			/*
-			 * Note: if scanner is working right, unescaped quotes can only
-			 * appear in pairs, so there should be another character.
-			 */
-			i++;
-			/* The bootstrap parser is not as smart, so check here. */
-			Assert(s[i] == '\'');
-			newStr[j] = s[i];
-		}
-		else if (s[i] == '\\')
-		{
-			i++;
-			switch (s[i])
-			{
-				case 'b':
-					newStr[j] = '\b';
-					break;
-				case 'f':
-					newStr[j] = '\f';
-					break;
-				case 'n':
-					newStr[j] = '\n';
-					break;
-				case 'r':
-					newStr[j] = '\r';
-					break;
-				case 't':
-					newStr[j] = '\t';
-					break;
-				case '0':
-				case '1':
-				case '2':
-				case '3':
-				case '4':
-				case '5':
-				case '6':
-				case '7':
-					{
-						int			k;
-						long		octVal = 0;
-
-						for (k = 0;
-							 s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
-							 k++)
-							octVal = (octVal << 3) + (s[i + k] - '0');
-						i += k - 1;
-						newStr[j] = ((char) octVal);
-					}
-					break;
-				default:
-					newStr[j] = s[i];
-					break;
-			}					/* switch */
-		}						/* s[i] == '\\' */
-		else
-			newStr[j] = s[i];
-		j++;
-	}
-	newStr[j] = '\0';
-	return newStr;
-}
-
 
 /*
  * downcase_truncate_identifier() --- do appropriate downcasing and
diff --git a/src/backend/utils/misc/guc-file.l b/src/backend/utils/misc/guc-file.l
index 268b745528..85202ab7eb 100644
--- a/src/backend/utils/misc/guc-file.l
+++ b/src/backend/utils/misc/guc-file.l
@@ -55,7 +55,6 @@ static void record_config_file_error(const char *errmsg,
 						 ConfigVariable **tail_p);
 
 static int	GUC_flex_fatal(const char *msg);
-static char *GUC_scanstr(const char *s);
 
 /* LCOV_EXCL_START */
 
@@ -797,7 +796,7 @@ ParseConfigFp(FILE *fp, const char *config_file, int depth, int elevel,
 			token != GUC_UNQUOTED_STRING)
 			goto parse_error;
 		if (token == GUC_STRING)	/* strip quotes and escapes */
-			opt_value = GUC_scanstr(yytext);
+			opt_value = DeescapeQuotedString(yytext);
 		else
 			opt_value = pstrdup(yytext);
 
@@ -1132,7 +1131,7 @@ FreeConfigVariable(ConfigVariable *item)
 
 
 /*
- *		scanstr
+ *		DeescapeQuotedString
  *
  * Strip the quotes surrounding the given string, and collapse any embedded
  * '' sequences and backslash escapes.
@@ -1140,8 +1139,8 @@ FreeConfigVariable(ConfigVariable *item)
  * the string returned is palloc'd and should eventually be pfree'd by the
  * caller.
  */
-static char *
-GUC_scanstr(const char *s)
+char *
+DeescapeQuotedString(const char *s)
 {
 	char	   *newStr;
 	int			len,
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 118b282d1c..ee3bfa82f4 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -331,12 +331,9 @@ escape_quotes(const char *src)
 
 /*
  * Escape a field value to be inserted into the BKI data.
- * Here, we first run the value through escape_quotes (which
- * will be inverted by the backend's scanstr() function) and
- * then overlay special processing of double quotes, which
- * bootscanner.l will only accept as data if converted to octal
- * representation ("\042").  We always wrap the value in double
- * quotes, even if that isn't strictly necessary.
+ * Run the value through escape_quotes (which will be inverted
+ * by the backend's DeescapeQuotedString() function), then wrap
+ * the value in single quotes, even if that isn't strictly necessary.
  */
 static char *
 escape_quotes_bki(const char *src)
@@ -345,30 +342,13 @@ escape_quotes_bki(const char *src)
 	char	   *data = escape_quotes(src);
 	char	   *resultp;
 	char	   *datap;
-	int			nquotes = 0;
 
-	/* count double quotes in data */
-	datap = data;
-	while ((datap = strchr(datap, '"')) != NULL)
-	{
-		nquotes++;
-		datap++;
-	}
-
-	result = (char *) pg_malloc(strlen(data) + 3 + nquotes * 3);
+	result = (char *) pg_malloc(strlen(data) + 3);
 	resultp = result;
-	*resultp++ = '"';
+	*resultp++ = '\'';
 	for (datap = data; *datap; datap++)
-	{
-		if (*datap == '"')
-		{
-			strcpy(resultp, "\\042");
-			resultp += 4;
-		}
-		else
-			*resultp++ = *datap;
-	}
-	*resultp++ = '"';
+		*resultp++ = *datap;
+	*resultp++ = '\'';
 	*resultp = '\0';
 
 	free(data);
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 7a6ee529ae..5bc426660d 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -1,8 +1,7 @@
 /*-------------------------------------------------------------------------
  *
  * scansup.h
- *	  scanner support routines.  used by both the bootstrap lexer
- * as well as the normal lexer
+ *	  scanner support routines used by the core lexer
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -15,8 +14,6 @@
 #ifndef SCANSUP_H
 #define SCANSUP_H
 
-extern char *scanstr(const char *s);
-
 extern char *downcase_truncate_identifier(const char *ident, int len,
 										  bool warn);
 
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 2819282181..073c8f3e06 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -155,6 +155,7 @@ extern bool ParseConfigDirectory(const char *includedir,
 								 ConfigVariable **head_p,
 								 ConfigVariable **tail_p);
 extern void FreeConfigVariables(ConfigVariable *list);
+extern char *DeescapeQuotedString(const char *s);
 
 /*
  * The possible values of an enum variable are specified by an array of
-- 
2.22.0

