On Wed, Jul 29, 2009 at 6:59 PM, Andres Freund<[email protected]> wrote:
> Looks nice. The only small gripe I have is that the patch adds trailing
> whitespaces at a lot of places...
>
> Except maybe that I do see no need for changes anymore...
I have fixed this for Sergey in the attached version using "git apply
--whitespace=fix". (For those who may be using git to develop
patches, I highly recommend git --check to catch these types of issues
before submitting.)
I will mark this "Ready for Committer".
...Robert
*** a/contrib/dict_xsyn/dict_xsyn.c
--- b/contrib/dict_xsyn/dict_xsyn.c
***************
*** 26,31 **** typedef struct
--- 26,32 ----
char *key; /* Word */
char *value; /* Unparsed list of synonyms, including the
* word itself */
+ int pos; /* Position of key word in original string */
} Syn;
typedef struct
***************
*** 33,39 **** typedef struct
--- 34,44 ----
int len;
Syn *syn;
+ bool matchorig;
bool keeporig;
+ bool matchsynonyms;
+ bool keepsynonyms;
+
} DictSyn;
***************
*** 88,93 **** read_dictionary(DictSyn *d, char *filename)
--- 93,99 ----
{
char *value;
char *key;
+ char *pos;
char *end = NULL;
if (*line == '\0')
***************
*** 96,121 **** read_dictionary(DictSyn *d, char *filename)
value = lowerstr(line);
pfree(line);
! key = find_word(value, &end);
! if (!key)
! {
! pfree(value);
! continue;
! }
! if (cur == d->len)
{
! d->len = (d->len > 0) ? 2 * d->len : 16;
! if (d->syn)
! d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
! else
! d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
! }
! d->syn[cur].key = pnstrdup(key, end - key);
! d->syn[cur].value = value;
! cur++;
}
tsearch_readline_end(&trst);
--- 102,140 ----
value = lowerstr(line);
pfree(line);
! pos = value;
! while((key = find_word(pos, &end)) != NULL)
{
! if (cur == d->len)
! {
! d->len = (d->len > 0) ? 2 * d->len : 16;
! if (d->syn)
! d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
! else
! d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
! }
!
! /* Read first word only if we will match it */
! if (pos != value || d->matchorig)
! {
! d->syn[cur].key = pnstrdup(key, end - key);
! d->syn[cur].value = pstrdup(value);
! d->syn[cur].pos = key - value;
!
! cur++;
! }
!
! pos = end;
! /* Don't read synonyms if we do not match them */
! if (!d->matchsynonyms)
! {
! break;
! }
! }
! pfree(value);
}
tsearch_readline_end(&trst);
***************
*** 133,155 **** dxsyn_init(PG_FUNCTION_ARGS)
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
d = (DictSyn *) palloc0(sizeof(DictSyn));
d->len = 0;
d->syn = NULL;
d->keeporig = true;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
! if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
{
d->keeporig = defGetBoolean(defel);
}
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
{
! read_dictionary(d, defGetString(defel));
}
else
{
--- 152,191 ----
List *dictoptions = (List *) PG_GETARG_POINTER(0);
DictSyn *d;
ListCell *l;
+ char *filename = NULL;
d = (DictSyn *) palloc0(sizeof(DictSyn));
d->len = 0;
d->syn = NULL;
+ d->matchorig = true;
d->keeporig = true;
+ d->matchsynonyms = false;
+ d->keepsynonyms = true;
foreach(l, dictoptions)
{
DefElem *defel = (DefElem *) lfirst(l);
! if (pg_strcasecmp(defel->defname, "MATCHORIG") == 0)
! {
! d->matchorig = defGetBoolean(defel);
! }
! else if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
{
d->keeporig = defGetBoolean(defel);
}
+ else if (pg_strcasecmp(defel->defname, "MATCHSYNONYMS") == 0)
+ {
+ d->matchsynonyms = defGetBoolean(defel);
+ }
+ else if (pg_strcasecmp(defel->defname, "KEEPSYNONYMS") == 0)
+ {
+ d->keepsynonyms = defGetBoolean(defel);
+ }
else if (pg_strcasecmp(defel->defname, "RULES") == 0)
{
! /* we can't read the rules before parsing all options! */
! filename = pstrdup(defGetString(defel));
}
else
{
***************
*** 160,165 **** dxsyn_init(PG_FUNCTION_ARGS)
--- 196,207 ----
}
}
+ if(filename)
+ {
+ read_dictionary(d, filename);
+ pfree(filename);
+ }
+
PG_RETURN_POINTER(d);
}
***************
*** 198,204 **** dxsyn_lexize(PG_FUNCTION_ARGS)
int value_length = strlen(value);
char *pos = value;
int nsyns = 0;
- bool is_first = true;
res = palloc(0);
--- 240,245 ----
***************
*** 214,221 **** dxsyn_lexize(PG_FUNCTION_ARGS)
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
res[nsyns].lexeme = NULL;
! /* first word is added to result only if KEEPORIG flag is set */
! if (d->keeporig || !is_first)
{
res[nsyns].lexeme = pstrdup(syn);
res[nsyns + 1].lexeme = NULL;
--- 255,262 ----
res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
res[nsyns].lexeme = NULL;
! /* The first word is added only if keeporig=true */
! if (pos != value || d->keeporig)
{
res[nsyns].lexeme = pstrdup(syn);
res[nsyns + 1].lexeme = NULL;
***************
*** 223,231 **** dxsyn_lexize(PG_FUNCTION_ARGS)
nsyns++;
}
- is_first = false;
-
pos = end + 1;
}
pfree(value);
--- 264,275 ----
nsyns++;
}
pos = end + 1;
+
+ if(!d->keepsynonyms)
+ {
+ break;
+ }
}
pfree(value);
*** a/contrib/dict_xsyn/expected/dict_xsyn.out
--- b/contrib/dict_xsyn/expected/dict_xsyn.out
***************
*** 5,14 ****
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
! --configuration
! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
--lexize
SELECT ts_lexize('xsyn', 'supernova');
ts_lexize
----------------
{sn,sne,1987a}
--- 5,80 ----
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
! -- default configuration - match first word and return it among with all synonyms
! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
--lexize
SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ --------------------------
+ {supernova,sn,sne,1987a}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ -- the same, but return only synonyms
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ ----------------
+ {sn,sne,1987a}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ -- match any word and return all words
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ --------------------------
+ {supernova,sn,sne,1987a}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ --------------------------
+ {supernova,sn,sne,1987a}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ -- match any word and return all words except first one
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ ----------------
+ {sn,sne,1987a}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
ts_lexize
----------------
{sn,sne,1987a}
***************
*** 20,22 **** SELECT ts_lexize('xsyn', 'grb');
--- 86,148 ----
(1 row)
+ -- match any synonym but not first word, and return first word instead
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ -------------
+ {supernova}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ -- do not match or return anything
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
+ -- match any word but return nothing
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ ts_lexize
+ -----------
+ {}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'sn');
+ ts_lexize
+ -----------
+ {}
+ (1 row)
+
+ SELECT ts_lexize('xsyn', 'grb');
+ ts_lexize
+ -----------
+
+ (1 row)
+
*** a/contrib/dict_xsyn/sql/dict_xsyn.sql
--- b/contrib/dict_xsyn/sql/dict_xsyn.sql
***************
*** 8,16 **** SET client_min_messages = warning;
\set ECHO all
RESET client_min_messages;
! --configuration
! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false);
--lexize
SELECT ts_lexize('xsyn', 'supernova');
SELECT ts_lexize('xsyn', 'grb');
--- 8,54 ----
\set ECHO all
RESET client_min_messages;
! -- default configuration - match first word and return it among with all synonyms
! ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
--lexize
SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
SELECT ts_lexize('xsyn', 'grb');
+
+ -- the same, but return only synonyms
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=false);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
+ -- match any word and return all words
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
+ -- match any word and return all words except first one
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=true, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
+ -- match any synonym but not first word, and return first word instead
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
+ -- do not match or return anything
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=false, KEEPSYNONYMS=false, MATCHSYNONYMS=false);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
+ -- match any word but return nothing
+ ALTER TEXT SEARCH DICTIONARY xsyn (RULES='xsyn_sample', KEEPORIG=false, MATCHORIG=true, KEEPSYNONYMS=false, MATCHSYNONYMS=true);
+ SELECT ts_lexize('xsyn', 'supernova');
+ SELECT ts_lexize('xsyn', 'sn');
+ SELECT ts_lexize('xsyn', 'grb');
+
*** a/doc/src/sgml/dict-xsyn.sgml
--- b/doc/src/sgml/dict-xsyn.sgml
***************
*** 23,35 ****
<itemizedlist>
<listitem>
<para>
<literal>keeporig</> controls whether the original word is included (if
! <literal>true</>), or only its synonyms (if <literal>false</>). Default
! is <literal>true</>.
</para>
</listitem>
<listitem>
<para>
<literal>rules</> is the base name of the file containing the list of
synonyms. This file must be stored in
<filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means
--- 23,54 ----
<itemizedlist>
<listitem>
<para>
+ <literal>matchorig</> controls whether the original word is accepted by
+ the dictionary. Default is <literal>true</>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
<literal>keeporig</> controls whether the original word is included (if
! <literal>true</>) in results, or only its synonyms (if
! <literal>false</>). Default is <literal>true</>.
! </para>
! </listitem>
! <listitem>
! <para>
! <literal>matchsynonyms</> controls whether any of the synonyms is accepted
! by the dictionary (if <literal>true</>). Default is <literal>false</>.
</para>
</listitem>
<listitem>
<para>
+ <literal>keepsynonyms</> controls whether synonyms are returned by the
+ dictionary (if <literal>true</>). Default is <literal>true</>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
<literal>rules</> is the base name of the file containing the list of
synonyms. This file must be stored in
<filename>$SHAREDIR/tsearch_data/</> (where <literal>$SHAREDIR</> means
***************
*** 90,96 **** ALTER TEXT SEARCH DICTIONARY
--- 109,139 ----
mydb=# SELECT ts_lexize('xsyn', 'word');
ts_lexize
-----------------------
+ {syn1,syn2,syn3}
+
+ mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true);
+ ALTER TEXT SEARCH DICTIONARY
+
+ mydb=# SELECT ts_lexize('xsyn', 'word');
+ ts_lexize
+ -----------------------
{word,syn1,syn2,syn3}
+
+ mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=false, MATCHSYNONYMS=true);
+ ALTER TEXT SEARCH DICTIONARY
+
+ mydb=# SELECT ts_lexize('xsyn', 'syn1');
+ ts_lexize
+ -----------------------
+ {syn1,syn2,syn3}
+
+ mydb# ALTER TEXT SEARCH DICTIONARY xsyn (RULES='my_rules', KEEPORIG=true, MATCHORIG=false, KEEPSYNONYMS=false);
+ ALTER TEXT SEARCH DICTIONARY
+
+ mydb=# SELECT ts_lexize('xsyn', 'syn1');
+ ts_lexize
+ -----------------------
+ {word}
</programlisting>
but real-world usage will involve including it in a text search
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers