From 1773d4f089d21e9b8dedf9c290f1ef1b1de026b9 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Sun, 2 May 2021 20:40:20 +0300
Subject: [PATCH] Make websearch_to_tsquery() parse text in quotes as a single
 token

---
 src/backend/utils/adt/tsquery.c       | 81 ++++++++-------------------
 src/test/regress/expected/tsearch.out | 24 +++++---
 src/test/regress/sql/tsearch.sql      |  1 +
 3 files changed, 39 insertions(+), 67 deletions(-)

diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index fe4470174f5..addf65b3525 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -77,7 +77,6 @@ struct TSQueryParserStateData
 	char	   *buf;			/* current scan point */
 	int			count;			/* nesting count, incremented by (,
 								 * decremented by ) */
-	bool		in_quotes;		/* phrase in quotes "" */
 	ts_parserstate state;
 
 	/* polish (prefix) notation in list, filled in by push* functions */
@@ -235,9 +234,6 @@ parse_or_operator(TSQueryParserState pstate)
 {
 	char	   *ptr = pstate->buf;
 
-	if (pstate->in_quotes)
-		return false;
-
 	/* it should begin with "OR" literal */
 	if (pg_strncasecmp(ptr, "or", 2) != 0)
 		return false;
@@ -398,38 +394,29 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 					state->buf++;
 					state->state = WAITOPERAND;
 
-					if (state->in_quotes)
-						continue;
-
 					*operator = OP_NOT;
 					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, '"'))
 				{
+					/* Everything in quotes is processed as a single token */
+
+					/* skip opening quote */
 					state->buf++;
+					*strval = state->buf;
 
-					if (!state->in_quotes)
-					{
-						state->state = WAITOPERAND;
+					/* iterate to the closing quotes or end of the string*/
+					while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
+						state->buf++;
+					*lenval = state->buf - *strval;
 
-						if (strchr(state->buf, '"'))
-						{
-							/* quoted text should be ordered <-> */
-							state->in_quotes = true;
-							return PT_OPEN;
-						}
+					/* skip closing quote if not end of the string */
+					if (*state->buf != '\0')
+						state->buf++;
 
-						/* web search tolerates missing quotes */
-						continue;
-					}
-					else
-					{
-						/* we have to provide an operand */
-						state->in_quotes = false;
-						state->state = WAITOPERATOR;
-						pushStop(state);
-						return PT_CLOSE;
-					}
+					state->state = WAITOPERATOR;
+					state->count++;
+					return PT_VAL;
 				}
 				else if (ISOPERATOR(state->buf))
 				{
@@ -467,24 +454,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 			case WAITOPERATOR:
 				if (t_iseq(state->buf, '"'))
 				{
-					if (!state->in_quotes)
-					{
-						/*
-						 * put implicit AND after an operand and handle this
-						 * quote in WAITOPERAND
-						 */
-						state->state = WAITOPERAND;
-						*operator = OP_AND;
-						return PT_OPR;
-					}
-					else
-					{
-						state->buf++;
-
-						/* just close quotes */
-						state->in_quotes = false;
-						return PT_CLOSE;
-					}
+					/*
+					 * put implicit AND after an operand and handle this
+					 * quote in WAITOPERAND
+					 */
+					state->state = WAITOPERAND;
+					*operator = OP_AND;
+					return PT_OPR;
 				}
 				else if (parse_or_operator(state))
 				{
@@ -498,18 +474,8 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
 				}
 				else if (!t_isspace(state->buf))
 				{
-					if (state->in_quotes)
-					{
-						/* put implicit <-> after an operand */
-						*operator = OP_PHRASE;
-						*weight = 1;
-					}
-					else
-					{
-						/* put implicit AND after an operand */
-						*operator = OP_AND;
-					}
-
+					/* put implicit AND after an operand */
+					*operator = OP_AND;
 					state->state = WAITOPERAND;
 					return PT_OPR;
 				}
@@ -846,7 +812,6 @@ parse_tsquery(char *buf,
 	state.buffer = buf;
 	state.buf = buf;
 	state.count = 0;
-	state.in_quotes = false;
 	state.state = WAITFIRSTOPERAND;
 	state.polstr = NIL;
 
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 4ae62320c9f..45b92a63388 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -2678,9 +2678,9 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
 
 -- test quotes
 select websearch_to_tsquery('english', '"pg_class pg');
-  websearch_to_tsquery   
--------------------------
- 'pg' <-> 'class' & 'pg'
+   websearch_to_tsquery    
+---------------------------
+ 'pg' <-> 'class' <-> 'pg'
 (1 row)
 
 select websearch_to_tsquery('english', 'pg_class pg"');
@@ -2695,6 +2695,12 @@ select websearch_to_tsquery('english', '"pg_class pg"');
  'pg' <-> 'class' <-> 'pg'
 (1 row)
 
+select websearch_to_tsquery('english', '"pg_class : pg"');
+   websearch_to_tsquery    
+---------------------------
+ 'pg' <-> 'class' <-> 'pg'
+(1 row)
+
 select websearch_to_tsquery('english', 'abc "pg_class pg"');
        websearch_to_tsquery        
 -----------------------------------
@@ -2708,15 +2714,15 @@ select websearch_to_tsquery('english', '"pg_class pg" def');
 (1 row)
 
 select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
-                  websearch_to_tsquery                  
---------------------------------------------------------
- 'abc' & 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' & 'def'
+                websearch_to_tsquery                
+----------------------------------------------------
+ 'abc' & 'pg' <-> 'pg' <-> 'class' <-> 'pg' & 'def'
 (1 row)
 
 select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
-          websearch_to_tsquery          
-----------------------------------------
- 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg'
+        websearch_to_tsquery        
+------------------------------------
+ 'pg' <-> 'pg' <-> 'class' <-> 'pg'
 (1 row)
 
 select websearch_to_tsquery('english', '""pg pg_class pg""');
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index b02ed73f6a8..d929210998a 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -759,6 +759,7 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
 select websearch_to_tsquery('english', '"pg_class pg');
 select websearch_to_tsquery('english', 'pg_class pg"');
 select websearch_to_tsquery('english', '"pg_class pg"');
+select websearch_to_tsquery('english', '"pg_class : pg"');
 select websearch_to_tsquery('english', 'abc "pg_class pg"');
 select websearch_to_tsquery('english', '"pg_class pg" def');
 select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
-- 
2.24.3 (Apple Git-128)

