Hello, Dmitry
This is my initial review for you patch. Below are my comments.
Introduction
------------
This patch introduce new operator and new functions.
New operator:
- ??
New functions:
- phraseto_tsquery([ config regconfig , ] query text)
- setweight(tsquery, "char")
- tsquery_phrase(query1 tsquery, query2 tsquery)
- tsquery_phrase(query1 tsquery, query2 tsquery, distance integer)
New regression tests are included in the patch. Information about new
features is provided in the documentation.
Initial Run
-----------
The patch applies correctly to HEAD. Regression tests pass successfully,
without errors. It seems that the patch work as you expected.
Performance
-----------
I have not tested possible performance issues yet. Maybe later I will
prepare some data to test performance.
Coding
------
I agree with others that there is a lack of comments for new functions.
Most of them without comments.
../pg_patches/phrase_search.patch:1086: trailing whitespace.
* QI_VALSTOP nodes should be cleaned and
../pg_patches/phrase_search.patch:1124: trailing whitespace.
if (priority < parentPriority)
../pg_patches/phrase_search.patch:1140: trailing whitespace.
if (priority < parentPriority)
../pg_patches/phrase_search.patch:1591: space before tab in indent.
/* (a|b) ? c => (a?c) | (b?c) */
../pg_patches/phrase_search.patch:1603: space before tab in indent.
/* !a ? b => b & !(a?b) */
It is git apply output. There are trailing whitespaces in the code.
+typedef struct MorphOpaque
+{
+ Oid cfg_id;
+ int qoperator; /* query operator */
+} MorphOpaque;
Maybe you should move structure definition to the top of the to_tsany.c
+ pushValue(state,
+ prs.words[count].word,
+ prs.words[count].len,
+ weight,
+ ((prs.words[count].flags
& TSL_PREFIX) || prefix) ?
+ true :
+ false);
There is not need in ternary operator here. You can write:
pushValue(state,
prs.words[count].word,
prs.words[count].len,
weight,
(prs.words[count].flags & TSL_PREFIX) || prefix);
/*
+ * Wrapper of check condition function for TS_execute.
+ * We are using the fact GIN_FALSE = 0 and GIN_MAYBE state
+ * should be treated as true, so, any non-zero value should be
+ * converted to boolean true.
+ */
+static bool
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ return !!checkcondition_gin_internal((GinChkVal *) checkval, val, data);
+}
Maybe write like this?
static bool
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
return checkcondition_gin_internal((GinChkVal *) checkval, val, data)
!= GIN_FALSE;
}
Then you don't need in the comment above of the function.
+#define PRINT_PRIORITY(x) ( (((QueryOperator*)(x))->oper == OP_NOT) ? 5 :
QO_PRIORITY(x) )
What is mean "5" here? You can define a new value near the:
#define OP_OR 1
#define OP_AND 2
#define OP_NOT 3
#define OP_PHRASE 4
+Datum
+tsquery_setweight(PG_FUNCTION_ARGS)
+{
+ TSQuery in = PG_GETARG_TSQUERY(0);
+ char cw = PG_GETARG_CHAR(1);
+ TSQuery out;
+ QueryItem *item;
+ int w = 0;
+
+ switch (cw)
+ {
+ case 'A':
+ case 'a':
+ w = 3;
+ break;
+ case 'B':
+ case 'b':
+ w = 2;
+ break;
+ case 'C':
+ case 'c':
+ w = 1;
+ break;
+ case 'D':
+ case 'd':
+ w = 0;
+ break;
+ default:
+ /* internal error */
+ elog(ERROR, "unrecognized weight: %d", cw);
+ }
+
+ out = (TSQuery) palloc(VARSIZE(in));
+ memcpy(out, in, VARSIZE(in));
+ item = GETQUERY(out);
+
+ while(item - GETQUERY(out) < out->size)
+ {
+ if (item->type == QI_VAL)
+ item->qoperand.weight |= (1 << w);
+
+ item++;
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
This function has the duplicated piece from the tsvector_setweight()
from tsvector_op.c. You can define new function for it.
+/*
+ * Check if datatype is the specified type or equivalent to it.
+ *
+ * Note: we could just do getBaseType() unconditionally, but since that's
+ * a relatively expensive catalog lookup that most users won't need, we
+ * try the straight comparison first.
+ */
+static bool
+is_expected_type(Oid typid, Oid expected_type)
+{
+ if (typid == expected_type)
+ return true;
+ typid = getBaseType(typid);
+ if (typid == expected_type)
+ return true;
+ return false;
+}
+
+/* Check if datatype is TEXT or binary-equivalent to it */
+static bool
+is_text_type(Oid typid)
+{
+ /* varchar(n) and char(n) are binary-compatible with text */
+ if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
+ return true;
+ /* Allow domains over these types, too */
+ typid = getBaseType(typid);
+ if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
+ return true;
+ return false;
+}
These functions was removed in 9acb9007de30b3daaa9efc16763c3bc6e3e0a92d.
It seems that tsvector_op.c was not synchronized with the master.
Conclusion
----------
This patch is large and it needs more research. I will be reviewing it
and will give another notes later.
--
Artur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers