On 2017-09-09 06:03, Thomas Munro wrote:
Please send a rebased version of the patch for people to review and
test as that one has bit-rotted.
Hello,
Thank you for interest. In the attachment you can find rebased
version(based on 69835bc8988812c960f4ed5aeee86b62ac73602a commit)
--
Victor Drobny
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 641b3b8..a694801 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9523,6 +9523,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<row>
<entry>
<indexterm>
+ <primary>queryto_tsquery</primary>
+ </indexterm>
+ <literal><function>queryto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
+ </entry>
+ <entry><type>tsquery</type></entry>
+ <entry>produce <type>tsquery</> from google like query</entry>
+ <entry><literal>queryto_tsquery('english', 'The Fat Rats')</literal></entry>
+ <entry><literal>'fat' & 'rat'</literal></entry>
+ </row>
+ <row>
+ <entry>
+ <indexterm>
<primary>querytree</primary>
</indexterm>
<literal><function>querytree(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>)</function></literal>
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index fe630a6..999e4ad 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -797,13 +797,15 @@ UPDATE tt SET ti =
<para>
<productname>PostgreSQL</productname> provides the
functions <function>to_tsquery</function>,
- <function>plainto_tsquery</function>, and
- <function>phraseto_tsquery</function>
+ <function>plainto_tsquery</function>,
+ <function>phraseto_tsquery</function> and
+ <function>queryto_tsquery</function>
for converting a query to the <type>tsquery</type> data type.
<function>to_tsquery</function> offers access to more features
than either <function>plainto_tsquery</function> or
<function>phraseto_tsquery</function>, but it is less forgiving
- about its input.
+ about its input. <function>queryto_tsquery</function> provides a
+ different, Google like syntax to create tsquery.
</para>
<indexterm>
@@ -960,8 +962,68 @@ SELECT phraseto_tsquery('english', 'The Fat & Rats:C');
-----------------------------
'fat' <-> 'rat' <-> 'c'
</screen>
+</para>
+
+<synopsis>
+queryto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</>
+</synopsis>
+
+ <para>
+ <function>queryto_tsquery</> creates a <type>tsquery</type> from a unformated text.
+ But instead of <function>plainto_tsquery</> and <function>phraseto_tsquery</> it won't
+ ignore already placed operations. This function supports following operators:
+ <itemizedlist spacing="compact" mark="bullet">
+ <listitem>
+ <para>
+ '"some text" - any text inside quote signs will be treated as a phrase and will be
+ performed like in <function>phraseto_tsquery</>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ 'OR' - standard logical operator. It is just an alias for '|'' sign.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ 'terma AROUND(N) termb' - this operation will match if the distance between
+ terma and termb is less than N.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ '-' - standard logical negation sign. It is an alias for '!' sign.
+ </para>
+ </listitem>
+ </itemizedlist>
+ Other missing operators will be replaced by AND like in <function>plainto_tsquery</>.
</para>
+ <para>
+ Examples:
+ <screen>
+ select queryto_tsquery('The fat rats');
+ queryto_tsquery
+ -----------------
+ 'fat' & 'rat'
+ (1 row)
+ </screen>
+ <screen>
+ select queryto_tsquery('"supernovae stars" AND -crab');
+ queryto_tsquery
+ ----------------------------------
+ 'supernova' <-> 'star' & !'crab'
+ (1 row)
+ </screen>
+ <screen>
+ select queryto_tsquery('-run AROUND(5) "gnu debugger" OR "I like bananas"');
+ queryto_tsquery
+ -----------------------------------------------------------
+ !'run' AROUND(5) 'gnu' <-> 'debugg' | 'like' <-> 'banana'
+ (1 row)
+ </screen>
+ </para>
+
</sect2>
<sect2 id="textsearch-ranking">
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 35d9ab2..e820042 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -390,7 +390,8 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
* and different variants are ORed together.
*/
static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight, bool prefix, bool isphrase)
{
int32 count = 0;
ParsedText prs;
@@ -423,7 +424,12 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
/* put placeholders for each missing stop word */
pushStop(state);
if (cntpos)
- pushOperator(state, data->qoperator, 1);
+ {
+ if (isphrase)
+ pushOperator(state, OP_PHRASE, 1);
+ else
+ pushOperator(state, data->qoperator, 1);
+ }
cntpos++;
pos++;
}
@@ -464,7 +470,10 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
if (cntpos)
{
/* distance may be useful */
- pushOperator(state, data->qoperator, 1);
+ if (isphrase)
+ pushOperator(state, OP_PHRASE, 1);
+ else
+ pushOperator(state, data->qoperator, 1);
}
cntpos++;
@@ -490,6 +499,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
+ false,
false);
PG_RETURN_TSQUERY(query);
@@ -520,7 +530,8 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ true,
+ false);
PG_RETURN_POINTER(query);
}
@@ -551,7 +562,8 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ true,
+ false);
PG_RETURN_TSQUERY(query);
}
@@ -567,3 +579,36 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
+
+Datum
+queryto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(1);
+ MorphOpaque data;
+ TSQuery query = NULL;
+
+ data.cfg_id = PG_GETARG_OID(0);
+
+ data.qoperator = OP_AND;
+
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ false,
+ true);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+queryto_tsquery(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(queryto_tsquery_byid,
+ ObjectIdGetDatum(cfgId),
+ PointerGetDatum(in)));
+
+}
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
index 046f543..375cb5c 100644
--- a/src/backend/tsearch/ts_selfuncs.c
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -396,6 +396,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
break;
case OP_PHRASE:
+ case OP_AROUND:
case OP_AND:
s1 = tsquery_opr_selec(item + 1, operand,
lookup, length, minfreq);
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 83a939d..7dc8f36 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -239,6 +239,7 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
return !result;
case OP_PHRASE:
+ case OP_AROUND:
/*
* GIN doesn't contain any information about positions, so treat
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index fdb0419..c238055 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -29,6 +29,7 @@ const int tsearch_op_priority[OP_COUNT] =
4, /* OP_NOT */
2, /* OP_AND */
1, /* OP_OR */
+ 3, /* OP_AROUND */
3 /* OP_PHRASE */
};
@@ -58,10 +59,11 @@ struct TSQueryParserStateData
};
/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR 2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
+#define WAITOPERAND 1
+#define WAITOPERATOR 2
+#define WAITFIRSTOPERAND 3
+#define WAITSINGLEOPERAND 4
+#define INSIDEQUOTES 5
/*
* subroutine to parse the modifiers (weight and prefix flag currently)
@@ -210,6 +212,69 @@ typedef enum
PT_CLOSE = 5
} ts_tokentype;
+
+static bool
+has_prefix(char * str, char * prefix)
+{
+ if (strlen(prefix) > strlen(str))
+ {
+ return false;
+ }
+ while (*prefix != '\0')
+ {
+ if (*(str++) != *(prefix++))
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Parse around operator. The operator
+ * have the following form:
+ *
+ * a AROUND(X) b (distance is no greater than X)
+ *
+ * The buffer should begin with "AROUND(" prefix
+ */
+static char *
+parse_around_operator(char *buf, int16 *distance)
+{
+ char *ptr = buf;
+ char *endptr;
+ long l = 1;
+
+ Assert(has_prefix(ptr, "AROUND("));
+
+ ptr += strlen("AROUND(");
+
+ while (t_isspace(ptr))
+ ptr++;
+
+ l = strtol(ptr, &endptr, 10);
+ if (ptr == endptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Invalid AROUND(X) operator!")));
+ else if (errno == ERANGE || l > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in AROUND operator should not be greater than %d",
+ MAXENTRYPOS)));
+
+ ptr = endptr;
+ *distance = l;
+ while (t_isspace(ptr))
+ ptr++;
+
+ if (!t_iseq(ptr, ')'))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Missing ')' in AROUND(X) operator")));
+
+ return ++ptr;
+}
/*
* get token from query string
*
@@ -221,7 +286,8 @@ typedef enum
static ts_tokentype
gettoken_query(TSQueryParserState state,
int8 *operator,
- int *lenval, char **strval, int16 *weight, bool *prefix)
+ int *lenval, char **strval, int16 *weight, bool *prefix,
+ bool isquery)
{
*weight = 0;
*prefix = false;
@@ -232,7 +298,7 @@ gettoken_query(TSQueryParserState state,
{
case WAITFIRSTOPERAND:
case WAITOPERAND:
- if (t_iseq(state->buf, '!'))
+ if (t_iseq(state->buf, '!') || (isquery && t_iseq(state->buf, '-')))
{
(state->buf)++; /* can safely ++, t_iseq guarantee that
* pg_mblen()==1 */
@@ -254,6 +320,20 @@ gettoken_query(TSQueryParserState state,
errmsg("syntax error in tsquery: \"%s\"",
state->buffer)));
}
+ else if (isquery && t_iseq(state->buf, '"'))
+ {
+ char *quote = strchr(state->buf + 1, '"');
+ if (quote == NULL)
+ {
+ state->buf++;
+ continue;
+ }
+ *strval = state->buf + 1;
+ *lenval = quote - state->buf - 1;
+ state->buf = quote + 1;
+ state->state = INSIDEQUOTES;
+ return PT_VAL;
+ }
else if (!t_isspace(state->buf))
{
/*
@@ -291,6 +371,13 @@ gettoken_query(TSQueryParserState state,
(state->buf)++;
return PT_OPR;
}
+ else if (isquery && has_prefix(state->buf, "OR "))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ (state->buf) += 3;
+ return PT_OPR;
+ }
else if (t_iseq(state->buf, '<'))
{
state->state = WAITOPERAND;
@@ -301,14 +388,39 @@ gettoken_query(TSQueryParserState state,
return PT_ERR;
return PT_OPR;
}
+ else if (isquery && has_prefix(state->buf, "AROUND("))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_AROUND;
+ /* weight var is used as storage for distance */
+ state->buf = parse_around_operator(state->buf, weight);
+ if (*weight < 0)
+ return PT_ERR;
+ return PT_OPR;
+ }
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
+ else if (t_iseq(state->buf, '('))
+ {
+ *operator = OP_AND;
+ state->state = WAITOPERAND;
+ return PT_OPR;
+ }
else if (*(state->buf) == '\0')
return (state->count) ? PT_ERR : PT_END;
+ else if (isquery &&
+ (t_isalpha(state->buf) || t_iseq(state->buf, '!')
+ || t_iseq(state->buf, '-')
+ || t_iseq(state->buf, '"')))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
else if (!t_isspace(state->buf))
return PT_ERR;
break;
@@ -320,6 +432,9 @@ gettoken_query(TSQueryParserState state,
state->buf += strlen(state->buf);
state->count++;
return PT_VAL;
+ case INSIDEQUOTES:
+ state->state = WAITOPERATOR;
+ continue;
default:
return PT_ERR;
break;
@@ -336,12 +451,12 @@ pushOperator(TSQueryParserState state, int8 oper, int16 distance)
{
QueryOperator *tmp;
- Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE || oper == OP_AROUND);
tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
tmp->type = QI_OPR;
tmp->oper = oper;
- tmp->distance = (oper == OP_PHRASE) ? distance : 0;
+ tmp->distance = (oper == OP_PHRASE || oper == OP_AROUND) ? distance : 0;
/* left is filled in later with findoprnd */
state->polstr = lcons(tmp, state->polstr);
@@ -475,7 +590,8 @@ cleanOpStack(TSQueryParserState state,
static void
makepol(TSQueryParserState state,
PushFunction pushval,
- Datum opaque)
+ Datum opaque,
+ bool isquery)
{
int8 operator = 0;
ts_tokentype type;
@@ -489,19 +605,19 @@ makepol(TSQueryParserState state,
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
+ while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix, isquery)) != PT_END)
{
switch (type)
{
case PT_VAL:
- pushval(opaque, state, strval, lenval, weight, prefix);
+ pushval(opaque, state, strval, lenval, weight, prefix, state->state == INSIDEQUOTES);
break;
case PT_OPR:
cleanOpStack(state, opstack, &lenstack, operator);
pushOpStack(opstack, &lenstack, operator, weight);
break;
case PT_OPEN:
- makepol(state, pushval, opaque);
+ makepol(state, pushval, opaque, isquery);
break;
case PT_CLOSE:
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
@@ -555,7 +671,8 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
Assert(curitem->oper == OP_AND ||
curitem->oper == OP_OR ||
- curitem->oper == OP_PHRASE);
+ curitem->oper == OP_PHRASE ||
+ curitem->oper == OP_AROUND);
(*pos)++;
@@ -605,7 +722,8 @@ TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
- bool isplain)
+ bool isplain,
+ bool isquery)
{
struct TSQueryParserStateData state;
int i;
@@ -632,7 +750,7 @@ parse_tsquery(char *buf,
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
- makepol(&state, pushval, opaque);
+ makepol(&state, pushval, opaque, isquery);
close_tsvector_parser(state.valstate);
@@ -703,7 +821,7 @@ parse_tsquery(char *buf,
static void
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
- int16 weight, bool prefix)
+ int16 weight, bool prefix, bool isphrase)
{
pushValue(state, strval, lenval, weight, prefix);
}
@@ -716,7 +834,7 @@ tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false, false));
}
/*
@@ -884,6 +1002,9 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp)
else
sprintf(in->cur, " <-> %s", nrm.buf);
break;
+ case OP_AROUND:
+ sprintf(in->cur, " AROUND(%d) %s", distance, nrm.buf);
+ break;
default:
/* OP_NOT is handled in above if-branch */
elog(ERROR, "unrecognized operator type: %d", op);
@@ -966,7 +1087,7 @@ tsquerysend(PG_FUNCTION_ARGS)
break;
case QI_OPR:
pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
- if (item->qoperator.oper == OP_PHRASE)
+ if (item->qoperator.oper == OP_PHRASE || item->qoperator.oper == OP_AROUND)
pq_sendint(&buf, item->qoperator.distance,
sizeof(item->qoperator.distance));
break;
@@ -1063,14 +1184,14 @@ tsqueryrecv(PG_FUNCTION_ARGS)
int8 oper;
oper = (int8) pq_getmsgint(buf, sizeof(int8));
- if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
+ if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE && oper != OP_AROUND)
elog(ERROR, "invalid tsquery: unrecognized operator type %d",
(int) oper);
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
item->qoperator.oper = oper;
- if (oper == OP_PHRASE)
+ if (oper == OP_PHRASE || oper == OP_AROUND)
item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
}
else
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
index 350171c..071bfa0 100644
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -161,7 +161,8 @@ clean_NOT_intree(NODE *node)
NODE *res = node;
Assert(node->valnode->qoperator.oper == OP_AND ||
- node->valnode->qoperator.oper == OP_PHRASE);
+ node->valnode->qoperator.oper == OP_PHRASE ||
+ node->valnode->qoperator.oper == OP_AROUND);
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
@@ -277,7 +278,8 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd)
node->right = clean_stopword_intree(node->right, &rladd, &rradd);
/* Check if current node is OP_PHRASE, get its distance */
- isphrase = (node->valnode->qoperator.oper == OP_PHRASE);
+ isphrase = (node->valnode->qoperator.oper == OP_PHRASE
+ || node->valnode->qoperator.oper == OP_AROUND);
ndistance = isphrase ? node->valnode->qoperator.distance : 0;
if (node->left == NULL && node->right == NULL)
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index 755c3e9..7cf9b8a 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -37,7 +37,7 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = QI_OPR;
res->valnode->qoperator.oper = operator;
- if (operator == OP_PHRASE)
+ if (operator == OP_PHRASE || operator == OP_AROUND)
res->valnode->qoperator.distance = distance;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
index 971bb81..548a846 100644
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -121,7 +121,7 @@ QTNodeCompare(QTNode *an, QTNode *bn)
return res;
}
- if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
+ if ((ao->oper == OP_PHRASE || ao->oper == OP_AROUND) && ao->distance != bo->distance)
return (ao->distance > bo->distance) ? -1 : 1;
return 0;
@@ -171,7 +171,8 @@ QTNSort(QTNode *in)
for (i = 0; i < in->nchild; i++)
QTNSort(in->child[i]);
- if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
+ if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE
+ && in->valnode->qoperator.oper != OP_AROUND)
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 4577bcc..b62b14d 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -366,7 +366,8 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
/* XXX: What about NOT? */
res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
- item->qoperator.oper == OP_PHRASE)) ?
+ item->qoperator.oper == OP_PHRASE ||
+ item->qoperator.oper == OP_AROUND)) ?
calc_rank_and(w, t, q) :
calc_rank_or(w, t, q);
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 8225202..92d267d 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1429,6 +1429,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
+#define TS_NOT_EXAC 0x08 /* not exact distance for AROUND(X) */
static bool
TS_phrase_output(ExecPhraseData *data,
@@ -1473,8 +1474,18 @@ TS_phrase_output(ExecPhraseData *data,
Rpos = INT_MAX;
}
+ /* Processing OP_AROUND */
+ if ((emit & TS_NOT_EXAC) &&
+ Lpos - Rpos >= 0 &&
+ Lpos - Rpos <= (Loffset + Roffset) * 2 - Rdata->width + Ldata->width)
+ {
+ if (emit & TSPO_BOTH)
+ output_pos = Rpos;
+ Lindex++;
+ Rindex++;
+ }
/* Merge-join the two input lists */
- if (Lpos < Rpos)
+ else if (Lpos < Rpos)
{
/* Lpos is not matched in Rdata, should we output it? */
if (emit & TSPO_L_ONLY)
@@ -1625,6 +1636,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
}
case OP_PHRASE:
+ case OP_AROUND:
case OP_AND:
memset(&Ldata, 0, sizeof(Ldata));
memset(&Rdata, 0, sizeof(Rdata));
@@ -1647,7 +1659,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
(Rdata.npos == 0 && !Rdata.negate))
return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
- if (curitem->qoperator.oper == OP_PHRASE)
+ if (curitem->qoperator.oper == OP_PHRASE || curitem->qoperator.oper == OP_AROUND)
{
/*
* Compute Loffset and Roffset suitable for phrase match, and
@@ -1703,7 +1715,7 @@ TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
{
/* straight AND */
return TS_phrase_output(data, &Ldata, &Rdata,
- TSPO_BOTH,
+ TSPO_BOTH | (curitem->qoperator.oper == OP_AROUND ? TS_NOT_EXAC : 0),
Loffset, Roffset,
Min(Ldata.npos, Rdata.npos));
}
@@ -1843,6 +1855,7 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags,
return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE:
+ case OP_AROUND:
return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
default:
@@ -1882,6 +1895,7 @@ tsquery_requires_match(QueryItem *curitem)
return false;
case OP_PHRASE:
+ case OP_AROUND:
/*
* Treat OP_PHRASE as OP_AND here
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index d820b56..79d0c43 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4907,6 +4907,8 @@ DATA(insert OID = 3746 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2
DESCR("make tsquery");
DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8889 ( queryto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ queryto_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
@@ -4915,6 +4917,8 @@ DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1
DESCR("make tsquery");
DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8890 ( queryto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ queryto_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 30d7c4b..2f4c374 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -166,8 +166,9 @@ typedef struct
#define OP_NOT 1
#define OP_AND 2
#define OP_OR 3
+#define OP_AROUND 5
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
-#define OP_COUNT 4
+#define OP_COUNT 5
extern const int tsearch_op_priority[OP_COUNT];
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 3312353..034f36c 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -44,11 +44,12 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
char *token, int tokenlen,
int16 tokenweights, /* bitmap as described in
* QueryOperand struct */
- bool prefix);
+ bool prefix,
+ bool isphrase);
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
- Datum opaque, bool isplain);
+ Datum opaque, bool isplain, bool isquery);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index b2fc9e2..93db7b8 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1661,3 +1661,115 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
(1 row)
set enable_seqscan = on;
+--test queryto_tsquery function
+select queryto_tsquery('My brand new smartphone');
+ queryto_tsquery
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select queryto_tsquery('My brand "new smartphone"');
+ queryto_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select queryto_tsquery('"A fat cat" has just eaten a -rat.');
+ queryto_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select queryto_tsquery('"A fat cat" has just eaten OR -rat.');
+ queryto_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select queryto_tsquery('"A fat cat" has just (eaten OR -rat)');
+ queryto_tsquery
+----------------------------------------
+ 'fat' <-> 'cat' & ( 'eaten' | !'rat' )
+(1 row)
+
+-- testing AROUND operator evaluation
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"gnu debugger" AROUND(5) runs');
+ ?column?
+----------
+ f
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('run AROUND(5) "gnu debugger"');
+ ?column?
+----------
+ f
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"gnu debugger" AROUND(6) runs');
+ ?column?
+----------
+ t
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('run AROUND(6) "gnu debugger"');
+ ?column?
+----------
+ t
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"many programming languages" AROUND(10) "portable debugger"');
+ ?column?
+----------
+ f
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"portable debugger" AROUND(10) "many programming languages"');
+ ?column?
+----------
+ f
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"many programming languages" AROUND(11) "portable debugger"');
+ ?column?
+----------
+ t
+(1 row)
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"portable debugger" AROUND(11) "many programming languages"');
+ ?column?
+----------
+ t
+(1 row)
+
+select queryto_tsquery('"fat cat AROUND(5) rat"');
+ queryto_tsquery
+------------------------------------------------
+ 'fat' <-> 'cat' <-> 'around' <-> '5' <-> 'rat'
+(1 row)
+
+select queryto_tsquery('simple','"fat cat OR rat"');
+ queryto_tsquery
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select queryto_tsquery('fat*rat');
+ queryto_tsquery
+-----------------
+ 'fat' & 'rat'
+(1 row)
+
+select queryto_tsquery('fat-rat');
+ queryto_tsquery
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index e4b21f8..12da75a 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -535,3 +535,34 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
set enable_seqscan = on;
+
+--test queryto_tsquery function
+select queryto_tsquery('My brand new smartphone');
+select queryto_tsquery('My brand "new smartphone"');
+select queryto_tsquery('"A fat cat" has just eaten a -rat.');
+select queryto_tsquery('"A fat cat" has just eaten OR -rat.');
+select queryto_tsquery('"A fat cat" has just (eaten OR -rat)');
+
+-- testing AROUND operator evaluation
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"gnu debugger" AROUND(5) runs');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('run AROUND(5) "gnu debugger"');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"gnu debugger" AROUND(6) runs');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('run AROUND(6) "gnu debugger"');
+
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"many programming languages" AROUND(10) "portable debugger"');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"portable debugger" AROUND(10) "many programming languages"');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"many programming languages" AROUND(11) "portable debugger"');
+select to_tsvector('The GNU Debugger is a portable debugger that runs on many Unix like systems and works for many programming languages') @@
+queryto_tsquery('"portable debugger" AROUND(11) "many programming languages"');
+
+select queryto_tsquery('"fat cat AROUND(5) rat"');
+select queryto_tsquery('simple','"fat cat OR rat"');
+select queryto_tsquery('fat*rat');
+select queryto_tsquery('fat-rat');
\ No newline at end of file
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers