Hello, Dmitry

This is my initial review for you patch. Below are my comments.

Introduction
------------

This patch introduce new operator and new functions.
New operator:
- ??
New functions:
- phraseto_tsquery([ config regconfig , ] query text)
- setweight(tsquery, "char")
- tsquery_phrase(query1 tsquery, query2 tsquery)
- tsquery_phrase(query1 tsquery, query2 tsquery, distance integer)

New regression tests are included in the patch. Information about new features is provided in the documentation.

Initial Run
-----------

The patch applies correctly to HEAD. Regression tests pass successfully, without errors. It seems that the patch work as you expected.

Performance
-----------

I have not tested possible performance issues yet. Maybe later I will prepare some data to test performance.

Coding
------

I agree with others that there is a lack of comments for new functions. Most of them without comments.

../pg_patches/phrase_search.patch:1086: trailing whitespace.
         * QI_VALSTOP nodes should be cleaned and
../pg_patches/phrase_search.patch:1124: trailing whitespace.
                if (priority < parentPriority)
../pg_patches/phrase_search.patch:1140: trailing whitespace.
                if (priority < parentPriority)
../pg_patches/phrase_search.patch:1591: space before tab in indent.
                                        /*      (a|b) ? c => (a?c) | (b?c) */
../pg_patches/phrase_search.patch:1603: space before tab in indent.
                                        /*      !a ? b    => b & !(a?b) */

It is git apply output. There are trailing whitespaces in the code.

+typedef struct MorphOpaque
+{
+       Oid             cfg_id;
+       int             qoperator;      /* query operator */
+} MorphOpaque;

Maybe you should move structure definition to the top of the to_tsany.c

+                                       pushValue(state,
+                                                         prs.words[count].word,
+                                                         prs.words[count].len,
+                                                         weight,
+                                                         ((prs.words[count].flags 
& TSL_PREFIX) || prefix) ?
+                                                                       true :
+                                                                       false);

There is not need in ternary operator here. You can write:

pushValue(state,
                  prs.words[count].word,
                  prs.words[count].len,
                  weight,
                  (prs.words[count].flags & TSL_PREFIX) || prefix);

 /*
+ * Wrapper of check condition function for TS_execute.
+ * We are using the fact GIN_FALSE = 0 and GIN_MAYBE state
+ * should be treated as true, so, any non-zero value should be
+ * converted to boolean true.
+ */
+static bool
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+       return !!checkcondition_gin_internal((GinChkVal *) checkval, val, data);
+}

Maybe write like this?

static bool
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
{
return checkcondition_gin_internal((GinChkVal *) checkval, val, data) != GIN_FALSE;
}

Then you don't need in the comment above of the function.

+#define PRINT_PRIORITY(x)      ( (((QueryOperator*)(x))->oper == OP_NOT) ? 5 : 
QO_PRIORITY(x) )

What is mean "5" here? You can define a new value near the:

#define OP_OR           1
#define OP_AND          2
#define OP_NOT          3
#define OP_PHRASE       4

+Datum
+tsquery_setweight(PG_FUNCTION_ARGS)
+{
+       TSQuery         in = PG_GETARG_TSQUERY(0);
+       char        cw = PG_GETARG_CHAR(1);
+       TSQuery         out;
+       QueryItem  *item;
+       int         w = 0;
+
+       switch (cw)
+       {
+               case 'A':
+               case 'a':
+                       w = 3;
+                       break;
+               case 'B':
+               case 'b':
+                       w = 2;
+                       break;
+               case 'C':
+               case 'c':
+                       w = 1;
+                       break;
+               case 'D':
+               case 'd':
+                       w = 0;
+                       break;
+               default:
+                       /* internal error */
+                       elog(ERROR, "unrecognized weight: %d", cw);
+       }
+
+       out = (TSQuery) palloc(VARSIZE(in));
+       memcpy(out, in, VARSIZE(in));
+       item = GETQUERY(out);
+
+       while(item - GETQUERY(out) < out->size)   
+       {
+               if (item->type == QI_VAL)
+                       item->qoperand.weight |= (1 << w);
+
+               item++;
+       }
+
+       PG_FREE_IF_COPY(in, 0);
+       PG_RETURN_POINTER(out);
+}

This function has the duplicated piece from the tsvector_setweight() from tsvector_op.c. You can define new function for it.

+/*
+ * Check if datatype is the specified type or equivalent to it.
+ *
+ * Note: we could just do getBaseType() unconditionally, but since that's
+ * a relatively expensive catalog lookup that most users won't need, we
+ * try the straight comparison first.
+ */
+static bool
+is_expected_type(Oid typid, Oid expected_type)
+{
+       if (typid == expected_type)
+               return true;
+       typid = getBaseType(typid);
+       if (typid == expected_type)
+               return true;
+       return false;
+}
+
+/* Check if datatype is TEXT or binary-equivalent to it */
+static bool
+is_text_type(Oid typid)
+{
+       /* varchar(n) and char(n) are binary-compatible with text */
+       if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
+               return true;
+       /* Allow domains over these types, too */
+       typid = getBaseType(typid);
+       if (typid == TEXTOID || typid == VARCHAROID || typid == BPCHAROID)
+               return true;
+       return false;
+}

These functions was removed in 9acb9007de30b3daaa9efc16763c3bc6e3e0a92d. It seems that tsvector_op.c was not synchronized with the master.

Conclusion
----------

This patch is large and it needs more research. I will be reviewing it and will give another notes later.

--
Artur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to