Hi Aleksandr,

I agree with Aleksander about silencing all errors in websearch_to_tsquery().

In the attachment is a revised patch with the attempt to introduce an
ability to ignore syntax errors in gettoken_tsvector().

Thanks for the further improvements! Yes, you're both right, the API has to be consistent. Unfortunately, I had to make some adjustments according to Oleg Bartunov's review. Here's a change log:

1. &, | and (), <-> are no longer considered operators in web search mode. 2. I've stumbled upon a bug: web search used to transform "pg_class" into 'pg <-> class', which is no longer the case. 3. I changed the behavior of gettoken_tsvector() as soon as I had heard from Aleksander Alekseev, so I decided to use my implementation in this revision of the patch. This is a good subject for discussion, though. Feel free to share your opinion.
4. As suggested by Theodor, I've replaced some bool args with bit flags.

The name of enum ts_parsestate looks more like a name of the function
than a name of a type.
In my version, it renamed to QueryParserState, but you can fix it if I'm wrong.

True, but gettoken_query() returns ts_tokentype, so I decided to use this naming scheme.

--
Dmitry Ivanov
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a8..6055fb6b4e 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  false);
+						  0);
 
 	PG_RETURN_TSQUERY(query);
 }
@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  P_TSQ_PLAIN);
 
 	PG_RETURN_POINTER(query);
 }
@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
 	query = parse_tsquery(text_to_cstring(in),
 						  pushval_morph,
 						  PointerGetDatum(&data),
-						  true);
+						  P_TSQ_PLAIN);
 
 	PG_RETURN_TSQUERY(query);
 }
@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
 										ObjectIdGetDatum(cfgId),
 										PointerGetDatum(in)));
 }
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(1);
+	MorphOpaque	data;
+	TSQuery		query = NULL;
+
+	data.cfg_id = PG_GETARG_OID(0);
+
+	data.qoperator = OP_AND;
+
+	query = parse_tsquery(text_to_cstring(in),
+						  pushval_morph,
+						  PointerGetDatum(&data),
+						  P_TSQ_WEB);
+
+	PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+	text	   *in = PG_GETARG_TEXT_PP(0);
+	Oid			cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+										ObjectIdGetDatum(cfgId),
+										PointerGetDatum(in)));
+
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 1ccbf79030..695bdb89e9 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -32,14 +32,27 @@ const int	tsearch_op_priority[OP_COUNT] =
 	3							/* OP_PHRASE */
 };
 
+/*
+ * parser's states
+ */
+typedef enum
+{
+	WAITOPERAND = 1,
+	WAITOPERATOR = 2,
+	WAITFIRSTOPERAND = 3,
+	WAITSINGLEOPERAND = 4
+} ts_parserstate;
+
 struct TSQueryParserStateData
 {
 	/* State for gettoken_query */
 	char	   *buffer;			/* entire string we are scanning */
 	char	   *buf;			/* current scan point */
-	int			state;
 	int			count;			/* nesting count, incremented by (,
 								 * decremented by ) */
+	bool		in_quotes;		/* phrase in quotes "" */
+	bool		is_web;			/* is it a web search? */
+	ts_parserstate state;
 
 	/* polish (prefix) notation in list, filled in by push* functions */
 	List	   *polstr;
@@ -57,12 +70,6 @@ struct TSQueryParserStateData
 	TSVectorParseState valstate;
 };
 
-/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR	2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
-
 /*
  * subroutine to parse the modifiers (weight and prefix flag currently)
  * part, like ':AB*' of a query.
@@ -197,6 +204,26 @@ err:
 	return buf;
 }
 
+/*
+ * Parse OR operator used in websearch_to_tsquery().
+ */
+static bool
+parse_or_operator(TSQueryParserState state)
+{
+	char *buf = state->buf;
+
+	if (state->in_quotes)
+		return false;
+
+	return (t_iseq(&buf[0], 'o') || t_iseq(&buf[0], 'O')) &&
+		   (t_iseq(&buf[1], 'r') || t_iseq(&buf[1], 'R')) &&
+		   (buf[2] != '\0' &&
+				!t_iseq(&buf[2], '-') &&
+				!t_iseq(&buf[2], '_') &&
+				!t_isalpha(&buf[2]) &&
+				!t_isdigit(&buf[2]));
+}
+
 /*
  * token types for parsing
  */
@@ -219,10 +246,12 @@ typedef enum
  *
  */
 static ts_tokentype
-gettoken_query(TSQueryParserState state,
-			   int8 *operator,
-			   int *lenval, char **strval, int16 *weight, bool *prefix)
+gettoken_query(TSQueryParserState state, int8 *operator,
+			   int *lenval, char **strval,
+			   int16 *weight, bool *prefix)
 {
+	bool is_web = state->is_web;
+
 	*weight = 0;
 	*prefix = false;
 
@@ -232,28 +261,63 @@ gettoken_query(TSQueryParserState state,
 		{
 			case WAITFIRSTOPERAND:
 			case WAITOPERAND:
-				if (t_iseq(state->buf, '!'))
+				if ((! is_web && t_iseq(state->buf, '!')) ||
+					(is_web && t_iseq(state->buf, '-')))
 				{
-					(state->buf)++; /* can safely ++, t_iseq guarantee that
-									 * pg_mblen()==1 */
+					state->buf++;
+
+					if (state->in_quotes)
+						continue;
+
 					*operator = OP_NOT;
 					state->state = WAITOPERAND;
 					return PT_OPR;
 				}
 				else if (t_iseq(state->buf, '('))
 				{
+					state->buf++;
+
+					if (is_web)
+						continue;
+
 					state->count++;
-					(state->buf)++;
 					state->state = WAITOPERAND;
 					return PT_OPEN;
 				}
 				else if (t_iseq(state->buf, ':'))
 				{
+					state->buf++;
+
+					if (is_web)
+						continue;
+
 					ereport(ERROR,
 							(errcode(ERRCODE_SYNTAX_ERROR),
 							 errmsg("syntax error in tsquery: \"%s\"",
 									state->buffer)));
 				}
+				else if (is_web && t_iseq(state->buf, '"'))
+				{
+					state->buf++;
+
+					/* web search tolerates missing quotes */
+					if (!state->in_quotes && strchr(state->buf, '"'))
+					{
+						/* quoted text should be ordered <-> */
+						state->in_quotes = true;
+						state->state = WAITOPERAND;
+					}
+					else
+						state->in_quotes = false;
+
+					continue;
+				}
+				else if (is_web && ISOPERATOR(state->buf))
+				{
+					/* or else gettoken_tsvector() will raise an error */
+					state->buf++;
+					continue;
+				}
 				else if (!t_isspace(state->buf))
 				{
 					/*
@@ -263,12 +327,22 @@ gettoken_query(TSQueryParserState state,
 					reset_tsvector_parser(state->valstate, state->buf);
 					if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
 					{
-						state->buf = get_modifiers(state->buf, weight, prefix);
+						if (!is_web)
+						{
+							/* web search does not support weights */
+							state->buf = get_modifiers(state->buf, weight, prefix);
+						}
 						state->state = WAITOPERATOR;
 						return PT_VAL;
 					}
 					else if (state->state == WAITFIRSTOPERAND)
 						return PT_END;
+					else if (is_web)
+					{
+						/* finally, we have to provide an operand */
+						pushStop(state);
+						return PT_END;
+					}
 					else
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
@@ -277,40 +351,95 @@ gettoken_query(TSQueryParserState state,
 				}
 				break;
 			case WAITOPERATOR:
-				if (t_iseq(state->buf, '&'))
+				if (! is_web && t_iseq(state->buf, '&'))
 				{
+					state->buf++;
 					state->state = WAITOPERAND;
 					*operator = OP_AND;
-					(state->buf)++;
 					return PT_OPR;
 				}
-				else if (t_iseq(state->buf, '|'))
+				else if (! is_web && t_iseq(state->buf, '|'))
 				{
+					state->buf++;
 					state->state = WAITOPERAND;
 					*operator = OP_OR;
-					(state->buf)++;
 					return PT_OPR;
 				}
-				else if (t_iseq(state->buf, '<'))
+				else if (! is_web && t_iseq(state->buf, '<'))
 				{
-					state->state = WAITOPERAND;
-					*operator = OP_PHRASE;
 					/* weight var is used as storage for distance */
 					state->buf = parse_phrase_operator(state->buf, weight);
+					state->state = WAITOPERAND;
+					*operator = OP_PHRASE;
 					if (*weight < 0)
 						return PT_ERR;
 					return PT_OPR;
 				}
-				else if (t_iseq(state->buf, ')'))
+				else if (! is_web && t_iseq(state->buf, ')'))
 				{
-					(state->buf)++;
+					state->buf++;
 					state->count--;
 					return (state->count < 0) ? PT_ERR : PT_CLOSE;
 				}
+				else if (is_web && t_iseq(state->buf, '"'))
+				{
+					state->buf++;
+
+					/* web search tolerates missing quotes */
+					if (!state->in_quotes && strchr(state->buf, '"'))
+					{
+						/* quoted text should be ordered <-> */
+						state->in_quotes = true;
+						state->state = WAITOPERAND;
+
+						/* put implicit AND after an operand */
+						*operator = OP_AND;
+						return PT_OPR;
+					}
+					else
+						state->in_quotes = false;
+
+					continue;
+				}
+				else if (is_web && parse_or_operator(state))
+				{
+					state->buf += 2; /* strlen("OR") */
+					state->state = WAITOPERAND;
+					*operator = OP_OR;
+					return PT_OPR;
+				}
+				else if (is_web && ISOPERATOR(state->buf))
+				{
+					/* just skip disabled operators */
+					state->buf++;
+					continue;
+				}
 				else if (*(state->buf) == '\0')
-					return (state->count) ? PT_ERR : PT_END;
+				{
+					/* web search tolerates unexpected end of line */
+					return (!is_web && state->count) ? PT_ERR : PT_END;
+				}
 				else if (!t_isspace(state->buf))
+				{
+					if (is_web)
+					{
+						if (state->in_quotes)
+						{
+							/* put implicit <-> after an operand */
+							*operator = OP_PHRASE;
+							*weight = 1;
+						}
+						else
+						{
+							/* put implicit AND after an operand */
+							*operator = OP_AND;
+						}
+
+						state->state = WAITOPERAND;
+						return PT_OPR;
+					}
 					return PT_ERR;
+				}
 				break;
 			case WAITSINGLEOPERAND:
 				if (*(state->buf) == '\0')
@@ -320,9 +449,6 @@ gettoken_query(TSQueryParserState state,
 				state->buf += strlen(state->buf);
 				state->count++;
 				return PT_VAL;
-			default:
-				return PT_ERR;
-				break;
 		}
 		state->buf += pg_mblen(state->buf);
 	}
@@ -605,7 +731,7 @@ TSQuery
 parse_tsquery(char *buf,
 			  PushFunction pushval,
 			  Datum opaque,
-			  bool isplain)
+			  int flags)
 {
 	struct TSQueryParserStateData state;
 	int			i;
@@ -613,17 +739,28 @@ parse_tsquery(char *buf,
 	int			commonlen;
 	QueryItem  *ptr;
 	ListCell   *cell;
-	bool		needcleanup;
+	bool		needcleanup,
+				is_plain,
+				is_web;
+	int			tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
+
+	is_plain = (flags & P_TSQ_PLAIN) != 0;
+	is_web = (flags & P_TSQ_WEB) != 0;
+
+	if (is_web)
+		tsv_flags |= P_TSV_IS_WEB;
 
 	/* init state */
 	state.buffer = buf;
 	state.buf = buf;
-	state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 	state.count = 0;
+	state.in_quotes = false;
+	state.is_web = is_web;
+	state.state = is_plain ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 	state.polstr = NIL;
 
 	/* init value parser's state */
-	state.valstate = init_tsvector_parser(state.buffer, true, true);
+	state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
 
 	/* init list of operand */
 	state.sumlen = 0;
@@ -716,7 +853,7 @@ tsqueryin(PG_FUNCTION_ARGS)
 {
 	char	   *in = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+	PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
 }
 
 /*
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 64e02ef434..7a27bd12a3 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS)
 	char	   *cur;
 	int			buflen = 256;	/* allocated size of tmpbuf */
 
-	state = init_tsvector_parser(buf, false, false);
+	state = init_tsvector_parser(buf, 0);
 
 	arrlen = 64;
 	arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index 7367ba6a40..fed411a842 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -33,6 +33,7 @@ struct TSVectorParseStateData
 	int			eml;			/* max bytes per character */
 	bool		oprisdelim;		/* treat ! | * ( ) as delimiters? */
 	bool		is_tsquery;		/* say "tsquery" not "tsvector" in errors? */
+	bool		is_web;			/* we're in websearch_to_tsquery() */
 };
 
 
@@ -42,7 +43,7 @@ struct TSVectorParseStateData
  * ! | & ( )
  */
 TSVectorParseState
-init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
+init_tsvector_parser(char *input, int flags)
 {
 	TSVectorParseState state;
 
@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
 	state->len = 32;
 	state->word = (char *) palloc(state->len);
 	state->eml = pg_database_encoding_max_length();
-	state->oprisdelim = oprisdelim;
-	state->is_tsquery = is_tsquery;
+	state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
+	state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
+	state->is_web = (flags & P_TSV_IS_WEB) != 0;
 
 	return state;
 }
@@ -89,16 +91,6 @@ do { \
 	} \
 } while (0)
 
-/* phrase operator begins with '<' */
-#define ISOPERATOR(x) \
-	( pg_mblen(x) == 1 && ( *(x) == '!' ||	\
-							*(x) == '&' ||	\
-							*(x) == '|' ||	\
-							*(x) == '(' ||	\
-							*(x) == ')' ||	\
-							*(x) == '<'		\
-						  ) )
-
 /* Fills gettoken_tsvector's output parameters, and returns true */
 #define RETURN_TOKEN \
 do { \
@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state,
 		{
 			if (*(state->prsbuf) == '\0')
 				return false;
-			else if (t_iseq(state->prsbuf, '\''))
+			else if (!state->is_web && t_iseq(state->prsbuf, '\''))
 				statecode = WAITENDCMPLX;
-			else if (t_iseq(state->prsbuf, '\\'))
+			else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
 			{
 				statecode = WAITNEXTCHAR;
 				oldstate = WAITENDWORD;
 			}
-			else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+			else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+					 (state->is_web && t_iseq(state->prsbuf, '"')))
 				PRSSYNTAXERROR;
 			else if (!t_isspace(state->prsbuf))
 			{
@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state,
 		}
 		else if (statecode == WAITENDWORD)
 		{
-			if (t_iseq(state->prsbuf, '\\'))
+			if (!state->is_web && t_iseq(state->prsbuf, '\\'))
 			{
 				statecode = WAITNEXTCHAR;
 				oldstate = WAITENDWORD;
 			}
 			else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
-					 (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+					 (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+					 (state->is_web && t_iseq(state->prsbuf, '"')))
 			{
 				RESIZEPRSBUF;
 				if (curpos == state->word)
@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state,
 		}
 		else if (statecode == WAITENDCMPLX)
 		{
-			if (t_iseq(state->prsbuf, '\''))
+			if (!state->is_web && t_iseq(state->prsbuf, '\''))
 			{
 				statecode = WAITCHARCMPLX;
 			}
-			else if (t_iseq(state->prsbuf, '\\'))
+			else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
 			{
 				statecode = WAITNEXTCHAR;
 				oldstate = WAITENDCMPLX;
@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state,
 		}
 		else if (statecode == WAITCHARCMPLX)
 		{
-			if (t_iseq(state->prsbuf, '\''))
+			if (!state->is_web && t_iseq(state->prsbuf, '\''))
 			{
 				RESIZEPRSBUF;
 				COPYCHAR(curpos, state->prsbuf);
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index bfc90098f8..00f1a85ae7 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4950,6 +4950,8 @@ DATA(insert OID = 3747 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s
 DESCR("transform to tsquery");
 DATA(insert OID = 5006 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8889 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f  t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 3749 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3750 (  to_tsquery		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
@@ -4958,6 +4960,8 @@ DATA(insert OID = 3751 (  plainto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s
 DESCR("transform to tsquery");
 DATA(insert OID = 5001 (  phraseto_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 8890 (  websearch_to_tsquery	PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 4209 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
 DESCR("transform jsonb to tsvector");
 DATA(insert OID = 4210 (  to_tsvector		PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f8ddce5ecb..73e969fe9c 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -25,9 +25,11 @@
 struct TSVectorParseStateData;	/* opaque struct in tsvector_parser.c */
 typedef struct TSVectorParseStateData *TSVectorParseState;
 
-extern TSVectorParseState init_tsvector_parser(char *input,
-					 bool oprisdelim,
-					 bool is_tsquery);
+#define P_TSV_OPR_IS_DELIM	(1 << 0)
+#define P_TSV_IS_TSQUERY	(1 << 1)
+#define P_TSV_IS_WEB		(1 << 2)
+
+extern TSVectorParseState init_tsvector_parser(char *input, int flags);
 extern void reset_tsvector_parser(TSVectorParseState state, char *input);
 extern bool gettoken_tsvector(TSVectorParseState state,
 				  char **token, int *len,
@@ -35,6 +37,16 @@ extern bool gettoken_tsvector(TSVectorParseState state,
 				  char **endptr);
 extern void close_tsvector_parser(TSVectorParseState state);
 
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+	( pg_mblen(x) == 1 && ( *(x) == '!' ||	\
+							*(x) == '&' ||	\
+							*(x) == '|' ||	\
+							*(x) == '(' ||	\
+							*(x) == ')' ||	\
+							*(x) == '<'		\
+						  ) )
+
 /* parse_tsquery */
 
 struct TSQueryParserStateData;	/* private in backend/utils/adt/tsquery.c */
@@ -46,9 +58,13 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
 													 * QueryOperand struct */
 							  bool prefix);
 
+#define P_TSQ_PLAIN		(1 << 0)
+#define P_TSQ_WEB		(1 << 1)
+
 extern TSQuery parse_tsquery(char *buf,
-			  PushFunction pushval,
-			  Datum opaque, bool isplain);
+							 PushFunction pushval,
+							 Datum opaque,
+							 int flags);
 
 /* Functions for use by PushFunction implementations */
 extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index d63fb12f1d..2b1da308df 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1672,3 +1672,325 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
 (1 row)
 
 set enable_seqscan = on;
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
+            websearch_to_tsquery             
+---------------------------------------------
+ 'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
+ websearch_to_tsquery  
+-----------------------
+ 'orange' & 'aabbccdd'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
+          websearch_to_tsquery           
+-----------------------------------------
+ 'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat:A : cat:B');
+   websearch_to_tsquery    
+---------------------------
+ 'fat' & 'a' & 'cat' & 'b'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc : def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc:def');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'a:::b');
+ websearch_to_tsquery 
+----------------------
+ 'a' & 'b'
+(1 row)
+
+select websearch_to_tsquery('simple', ':');
+NOTICE:  text-search query doesn't contain lexemes: ":"
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand new smartphone');
+     websearch_to_tsquery      
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand "new smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand "new -smartphone"');
+      websearch_to_tsquery       
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand:B "new -smartphone"');
+         websearch_to_tsquery          
+---------------------------------------
+ 'brand' & 'b' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat or rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR');
+ websearch_to_tsquery 
+----------------------
+ 'cat' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'OR rat');
+ websearch_to_tsquery 
+----------------------
+ 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+ websearch_to_tsquery  
+-----------------------
+ 'fat' & 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat*rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat-rat');
+   websearch_to_tsquery    
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat_rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
+       websearch_to_tsquery        
+-----------------------------------
+ 'fat' <-> 'cat' | 'fat' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or(rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or)rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or&rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or|rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or!rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or<rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or>rat');
+ websearch_to_tsquery 
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or ');
+ websearch_to_tsquery 
+----------------------
+ 'fat'
+(1 row)
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orange'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc orтест');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'orтест'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR1234');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or1234'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or-abc');
+      websearch_to_tsquery       
+---------------------------------
+ 'abc' & 'or-abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR_abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'or OR or');
+ websearch_to_tsquery 
+----------------------
+ 'or' | 'or'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+       websearch_to_tsquery        
+-----------------------------------
+ 'fat' <-> 'cat' & 'eaten' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+        websearch_to_tsquery        
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', 'this is ----fine');
+ websearch_to_tsquery 
+----------------------
+ !!!!'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
+          websearch_to_tsquery          
+----------------------------------------
+ !'fine' & 'dear' <-> 'friend' | 'good'
+(1 row)
+
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+  websearch_to_tsquery  
+------------------------
+ 'old' & 'cat' & 'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+         websearch_to_tsquery         
+--------------------------------------
+ 'толст' <-> 'кошк' & 'съел' & 'крыс'
+(1 row)
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ t
+(1 row)
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+ ?column? 
+----------
+ f
+(1 row)
+
+-- cases handled by gettoken_tsvector()
+select websearch_to_tsquery('''');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
+select websearch_to_tsquery('''abc''''def''');
+ websearch_to_tsquery 
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('\abc');
+ websearch_to_tsquery 
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('\');
+NOTICE:  text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery 
+----------------------
+ 
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1c8520b3e9..da8d089100 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -539,3 +539,75 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
 set enable_seqscan = on;
+
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
+select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
+select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
+select websearch_to_tsquery('simple', 'fat:A : cat:B');
+
+select websearch_to_tsquery('simple', 'abc : def');
+select websearch_to_tsquery('simple', 'abc:def');
+select websearch_to_tsquery('simple', 'a:::b');
+select websearch_to_tsquery('simple', ':');
+
+select websearch_to_tsquery('english', 'My brand new smartphone');
+select websearch_to_tsquery('english', 'My brand "new smartphone"');
+select websearch_to_tsquery('english', 'My brand "new -smartphone"');
+select websearch_to_tsquery('english', 'My brand:B "new -smartphone"');
+
+select websearch_to_tsquery('simple', 'cat or rat');
+select websearch_to_tsquery('simple', 'cat OR rat');
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+select websearch_to_tsquery('simple', 'cat OR');
+select websearch_to_tsquery('simple', 'OR rat');
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+select websearch_to_tsquery('simple', 'fat*rat');
+select websearch_to_tsquery('simple', 'fat-rat');
+select websearch_to_tsquery('simple', 'fat_rat');
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
+select websearch_to_tsquery('simple', 'fat or(rat');
+select websearch_to_tsquery('simple', 'fat or)rat');
+select websearch_to_tsquery('simple', 'fat or&rat');
+select websearch_to_tsquery('simple', 'fat or|rat');
+select websearch_to_tsquery('simple', 'fat or!rat');
+select websearch_to_tsquery('simple', 'fat or<rat');
+select websearch_to_tsquery('simple', 'fat or>rat');
+select websearch_to_tsquery('simple', 'fat or ');
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+select websearch_to_tsquery('simple', 'abc orтест');
+select websearch_to_tsquery('simple', 'abc OR1234');
+select websearch_to_tsquery('simple', 'abc or-abc');
+select websearch_to_tsquery('simple', 'abc OR_abc');
+select websearch_to_tsquery('simple', 'abc or');
+
+select websearch_to_tsquery('simple', 'or OR or');
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+
+select websearch_to_tsquery('english', 'this is ----fine');
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+select websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+select to_tsvector('russian', 'съела толстая серая кошка крысу') @@
+websearch_to_tsquery('russian', '"толстая кошка" съела крысу');
+
+-- cases handled by gettoken_tsvector()
+select websearch_to_tsquery('''');
+select websearch_to_tsquery('''abc''''def''');
+select websearch_to_tsquery('\abc');
+select websearch_to_tsquery('\');

Reply via email to