[HACKERS] [PATCH] few fts functions for jsonb

2017-02-28 Thread Dmitry Dolgov
Hi all

I would like to propose patch with a set of new small functions for fts in
case of
jsonb data type:

* to_tsvector(config, jsonb) - make a tsvector from all string values and
  elements of jsonb object. To prevent the situation, when tsquery can find
a
  phrase consisting of lexemes from two different values/elements, this
  function will add an increment to position of each lexeme from every new
  value/element.

* ts_headline(config, jsonb, tsquery, options) - generate a headline
directly
  from jsonb object

Here are the examples how they work:

```
=# select to_tsvector('{"a": "aaa bbb", "b": ["ccc ddd"], "c": {"d": "eee
fff"}}'::jsonb);
   to_tsvector
-
 'aaa':1 'bbb':2 'ccc':4 'ddd':5 'eee':7 'fff':8
(1 row)


=# select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc
ddd"}}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
 ts_headline
--
 aaa  ccc 
(1 row)
```

Any comments or suggestions?
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..08e08e5 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,8 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
+#include "utils/fmgrprotos.h"
 
 
 typedef struct MorphOpaque
@@ -256,6 +258,58 @@ to_tsvector(PG_FUNCTION_ARGS)
 		PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb*jb = PG_GETARG_JSONB(0);
+	JsonbIterator		*it;
+	JsonbValue			v;
+	Oid	cfgId;
+	ParsedText			prs;
+	TSVector			result, item_vector;
+	JsonbIteratorToken	type;
+	int	i;
+
+	cfgId = getTSCurrentConfig(true);
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			prs.lenwords = v.val.string.len / 6;
+
+			if (prs.lenwords == 0)
+prs.lenwords = 2;
+
+			prs.curwords = 0;
+			prs.pos = 0;
+			prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+			parsetext(cfgId, &prs, v.val.string.val, v.val.string.len);
+
+			if (prs.curwords)
+			{
+if (result->size != 0)
+{
+	for (i = 0; i < prs.curwords; i++)
+		prs.words[i].pos.pos = prs.words[i].pos.pos + TS_JUMP;
+
+	item_vector = make_tsvector(&prs);
+
+	result = DirectFunctionCall2(tsvector_concat,
+			TSVectorGetDatum(result),
+			PointerGetDatum(item_vector));
+}
+else
+	result = make_tsvector(&prs);
+			}
+		}
+	}
+
+	PG_RETURN_DATUM(result);
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..035632e 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/varlena.h"
+#include "utils/jsonb.h"
 
 
 /**sql-level interface**/
@@ -362,3 +363,41 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 		PG_GETARG_DATUM(1),
 		PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+  ObjectIdGetDatum(getTSCurrentConfig(true)),
+		CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(0))),
+		PG_GETARG_DATUM(1)));
+}
+
+Datum
+ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_byid_opt,
+		PG_GETARG_DATUM(0),
+		CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(1))),
+		PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+  ObjectIdGetDatum(getTSCurrentConfig(true)),
+		CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(0))),
+		PG_GETARG_DATUM(1),
+		PG_GETARG_DATUM(2)));
+}
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_byid_opt,
+		PG_GETARG_DATUM(0),
+		CStringGetTextDatum(jsonb_values_as_string(PG_GETARG_DATUM(1))),
+		PG_GETARG_DATUM(2),
+		PG_GETARG_DATUM(3)));
+}
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..d504b87 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -4130,3 +4130,29 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Gather all string values and elements from jsonb into one string buffer.
+ * It's convenient for using inside ts_headline_* functions.
+ */
+char*
+jsonb_values_as_string(Jsonb *jb)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+	StringInfo			buffer = makeStringInfo();
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, fa

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-20 Thread Andrew Dunstan


On 03/10/2017 11:13 AM, Dmitry Dolgov wrote:
> > On 28 February 2017 at 19:21, Oleg Bartunov  > wrote:
> > 1. add json support
>
> I've added json support for all functions.
>
> >  Its_headline  should returns the original json with highlighting
>
> Yes, I see now. I don't think it's worth it to add a special option
> for that
> purpose, so I've just changed the implementation to return the
> original json(b).
>


This is a pretty good idea.

However, I think it should probably be broken up into a couple of pieces
- one for the generic json/jsonb transforms infrastructure (which
probably needs some more comments) and one for the FTS functions that
will use it.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-21 Thread Dmitry Dolgov
> On 21 March 2017 at 03:03, Andrew Dunstan 
wrote:
>
> However, I think it should probably be broken up into a couple of pieces -
> one for the generic json/jsonb transforms infrastructure (which probably
> needs some more comments) and one for the FTS functions that will use it.

Sure, here are two patches with separated functionality and a bit more
commentaries for the transform functions.
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..bac08c0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -42,6 +42,8 @@
 #define JB_PATH_CREATE_OR_INSERT \
 	(JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
 
+#define is_jsonb_data(type) (type == WJB_KEY || type == WJB_VALUE || type == WJB_ELEM)
+
 /* state for json_object_keys */
 typedef struct OkeysState
 {
@@ -52,6 +54,23 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json function */
+typedef struct IterateJsonState
+{
+	JsonLexContext		*lex;
+	JsonIterateAction	action;			/* an action that will be applied to each json value */
+	void*action_state;	/* any necessary context for iteration */
+} IterateJsonState;
+
+/* state for transform_json function */
+typedef struct TransformJsonState
+{
+	JsonLexContext		*lex;
+	StringInfo			strval;			/* resulting json */
+	JsonTransformAction	action;			/* an action that will be applied to each json value */
+	void*action_state;	/* any necessary context for transformation */
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void apply_action(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_object_start(void *state);
+static void transform_object_end(void *state);
+static void transform_array_start(void *state);
+static void transform_array_end(void *state);
+static void transform_object_field_start(void *state, char *fname, bool isnull);
+static void transform_array_element_start(void *state, bool isnull);
+static void transform_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,206 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them together with
+ * an iteration state to a specified JsonIterateAction.
+ */
+void *
+iterate_jsonb_values(Jsonb *jb, void *state, JsonIterateAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+
+	return state;
+}
+
+/*
+ * Iterate over json string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateAction.
+ */
+void *
+iterate_json_values(text *json, void *action_state, JsonIterateAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = apply_action;
+
+	pg_parse_json(lex, sem);
+
+	return state;
+}
+
+/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateAction.
+ */
+static void
+apply_action(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformAction to every
+ * string value or element. Any necessary context for a JsonTransformAction can
+ * be passed in the action_state variable. Function returns a copy of an original jsonb
+ * object with transformed values.
+ */
+Jsonb *
+transform_jsonb(Jsonb *jsonb, void *action_state, JsonTransformAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text*out;
+	boolis_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(ou

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-23 Thread Andrew Dunstan


On 03/21/2017 06:28 PM, Dmitry Dolgov wrote:
> > On 21 March 2017 at 03:03, Andrew Dunstan
>  > wrote:
> >
> > However, I think it should probably be broken up into a couple of
> pieces -
> > one for the generic json/jsonb transforms infrastructure (which probably
> > needs some more comments) and one for the FTS functions that will
> use it.
>
> Sure, here are two patches with separated functionality and a bit more
> commentaries for the transform functions.

I'm not through looking at this. However, here are a few preliminary
comments

  * we might need to rationalize the header locations a bit
  * iterate_json(b) and transform_json(b) are a bit too generally named.
Really what they do is iterate over or transform string values in
the json(b). They ignore / preserve the structure, keys, and
non-string scalar values in the json(b). A general iterate or
transform function would be called in effect with a stream of all
the elements in the json, not just scalar strings.
  * Unless I'm missing something the iterate_json(b)_values return value
is ignored. Instead of returning the state it looks to me like it
should return nothing and be declared as void instead of void *
  * transform_jsonb and transform_json are somewhat asymmetrical. The
latter should probably return a text* instead of a StringInfo, to be
consistent with the former.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-26 Thread Dmitry Dolgov
> I'm not through looking at this. However, here are a few preliminary
comments

I've attached new versions of the patches with improvements related to
these commentaries.
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..8f7bcfe 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 		PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_values(jb, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_values(json, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+	TSVectorGetDatum(state->result),
+	PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..ab1716a 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQueryquery;
+	List*prsoptions;
+	booltransformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,177 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 		PG_GETARG_DATUM(1),
 		PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
+
+	state->prs = &prs;
+	state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
+	state->prsobj = lookup_ts_parser_cache(state->

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-29 Thread Andrew Dunstan
On 26 March 2017 at 17:57, Dmitry Dolgov <9erthali...@gmail.com> wrote:
>> I'm not through looking at this. However, here are a few preliminary
>> comments
>
> I've attached new versions of the patches with improvements related to these
> commentaries.

These patches seem fundamentally OK. But I'm still not happy with the
naming etc.

I think the header changes would probably be better placed in
jsonapi.h or in a new header file.

And the names still seem too general to me. e.g. transform_json_values
should probably be transform_json_string_values, and the static
support functions should be renamed to match. Also the
JsonIterateAction and JsonTransformAction funtion typedefs should
probably be renamed to match.

I'm not sure there is any great point in the is_jsonb_data macro,
which is only used in one spot. I would get rid of it and expand the
test in place.

I don't have much time this week to work on it, as there are one or
two other patches I also want to look at.  If you clean these things
up I will commit it. The second patch looks fine.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-29 Thread Dmitry Dolgov
> On 29 March 2017 at 18:28, Andrew Dunstan 
wrote:
>
> These patches seem fundamentally OK. But I'm still not happy with the
> naming etc.

I've changed names for all functions and action definitions, moved out the
changes in header file to `jsonapi.h` and removed `is_jsonb_data` macro. So
it
should be better now.
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
index 6a7aab2..c9f86b0 100644
--- a/src/backend/utils/adt/jsonfuncs.c
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -52,6 +52,25 @@ typedef struct OkeysState
 	int			sent_count;
 } OkeysState;
 
+/* state for iterate_json function */
+typedef struct IterateJsonState
+{
+	JsonLexContext	*lex;
+	JsonIterateStringValuesAction	action;			/* an action that will be applied
+	   to each json value */
+	void			*action_state;	/* any necessary context for iteration */
+} IterateJsonState;
+
+/* state for transform_json function */
+typedef struct TransformJsonState
+{
+	JsonLexContext	*lex;
+	StringInfo		strval;			/* resulting json */
+	JsonTransformStringValuesAction	action;			/* an action that will be applied
+	   to each json value */
+	void			*action_state;	/* any necessary context for transformation */
+} TransformJsonState;
+
 /* state for json_get* functions */
 typedef struct GetState
 {
@@ -271,6 +290,18 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
 			 int level, Jsonb *newval, uint32 nelems, int op_type);
 static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
 
+/* function supporting iterate_json(b) */
+static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* function supporting transform_json(b) */
+static void transform_string_values_object_start(void *state);
+static void transform_string_values_object_end(void *state);
+static void transform_string_values_array_start(void *state);
+static void transform_string_values_array_end(void *state);
+static void transform_string_values_object_field_start(void *state, char *fname, bool isnull);
+static void transform_string_values_array_element_start(void *state, bool isnull);
+static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
 
 /*
  * SQL function json_object_keys
@@ -4130,3 +4161,208 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
 		}
 	}
 }
+
+/*
+ * Iterate over jsonb string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v;
+	JsonbIteratorToken	type;
+
+	it = JsonbIteratorInit(&jb->root);
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			action(state, v.val.string.val, v.val.string.len);
+		}
+	}
+}
+
+/*
+ * Iterate over json string values or elements, and pass them together with an
+ * iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
+{
+	JsonLexContext *lex = makeJsonLexContext(json, true);
+	JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+	IterateJsonState   *state = palloc0(sizeof(IterateJsonState));
+
+	state->lex = lex;
+	state->action = action;
+	state->action_state = action_state;
+
+	sem->semstate = (void *) state;
+	sem->scalar = iterate_string_values_scalar;
+
+	pg_parse_json(lex, sem);
+}
+
+/*
+ * An auxiliary function for iterate_json_string_values to invoke a specified
+ * JsonIterateStringValuesAction.
+ */
+static void
+iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+	IterateJsonState   *_state = (IterateJsonState *) state;
+	if (tokentype == JSON_TOKEN_STRING)
+		(*_state->action) (_state->action_state, token, strlen(token));
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
+ * to every string value or element. Any necessary context for a
+ * JsonTransformStringValuesAction can be passed in the action_state variable.
+ * Function returns a copy of an original jsonb object with transformed values.
+ */
+Jsonb *
+transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
+			  JsonTransformStringValuesAction transform_action)
+{
+	JsonbIterator		*it;
+	JsonbValue			v, *res = NULL;
+	JsonbIteratorToken	type;
+	JsonbParseState		*st = NULL;
+	text*out;
+	boolis_scalar = false;
+
+	it = JsonbIteratorInit(&jsonb->root);
+	is_scalar = it->isScalar;
+
+	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+	{
+		if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+		{
+			out = transform_action(action_state, v.val.string.val, v.val.string.len);
+			v.val.string.val = VARDATA_ANY(out);
+			v.val.string.len = VARSIZE_

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-30 Thread Andrew Dunstan
On 29 March 2017 at 16:19, Dmitry Dolgov <9erthali...@gmail.com> wrote:
>> On 29 March 2017 at 18:28, Andrew Dunstan 
>> wrote:
>>
>> These patches seem fundamentally OK. But I'm still not happy with the
>> naming etc.
>
> I've changed names for all functions and action definitions, moved out the
> changes in header file to `jsonapi.h` and removed `is_jsonb_data` macro. So
> it
> should be better now.

I have just noticed as I was writing/testing the non-existent docs for
this patch that it doesn't supply variants of to_tsvector that take a
regconfig as the first argument. Is there a reason for that? Why
should the json(b) versions be different from the text versions?

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-30 Thread Dmitry Dolgov
On 31 March 2017 at 00:01, Andrew Dunstan 
wrote:
>
> I have just noticed as I was writing/testing the non-existent docs for
> this patch that it doesn't supply variants of to_tsvector that take a
> regconfig as the first argument. Is there a reason for that? Why
> should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version of the
patch (only the functions part)
to add those variants.
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..f19383e 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
 		PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid	cfgId = PG_GETARG_OID(0);
+	Jsonb*jb = PG_GETARG_JSONB(1);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = cfgId;
+	state.prs = prs;
+
+	iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb	*jb = PG_GETARG_JSONB(0);
+	Oid		cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
+		ObjectIdGetDatum(cfgId),
+		JsonbGetDatum(jb)));
+}
+
+Datum
+json_to_tsvector_byid(PG_FUNCTION_ARGS)
+{
+	Oid	cfgId = PG_GETARG_OID(0);
+	text*json = PG_GETARG_TEXT_P(1);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = cfgId;
+	state.prs = prs;
+
+	iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text	*json = PG_GETARG_TEXT_P(0);
+	Oid		cfgId;
+
+	cfgId = getTSCurrentConfig(true);
+	PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
+		ObjectIdGetDatum(cfgId),
+		PointerGetDatum(json)));
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+	TSVectorGetDatum(state->result),
+	PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..6e4e445 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -20,6 +20,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonapi.h"
 #include "utils/varlena.h"
 
 
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQueryquery;
+	List*prsoptions;
+	booltransform

Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-31 Thread Oleg Bartunov
On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthali...@gmail.com> wrote:

On 31 March 2017 at 00:01, Andrew Dunstan 
wrote:
>
> I have just noticed as I was writing/testing the non-existent docs for
> this patch that it doesn't supply variants of to_tsvector that take a
> regconfig as the first argument. Is there a reason for that? Why
> should the json(b) versions be different from the text versions?

No, there is no reason, I just missed that. Here is a new version of the
patch (only the functions part)
to add those variants.


Congratulations with patch committed, who will write an addition
documentation? I think we need to touch  FTS and JSON parts.


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-01 Thread Andrew Dunstan


On 03/31/2017 03:17 PM, Oleg Bartunov wrote:
>
>
> On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthali...@gmail.com
> > wrote:
>
> On 31 March 2017 at 00:01, Andrew Dunstan
>  > wrote:
> >
> > I have just noticed as I was writing/testing the non-existent
> docs for
> > this patch that it doesn't supply variants of to_tsvector that
> take a
> > regconfig as the first argument. Is there a reason for that? Why
> > should the json(b) versions be different from the text versions?
>
> No, there is no reason, I just missed that. Here is a new version
> of the patch (only the functions part)
> to add those variants.
>
>
> Congratulations with patch committed, who will write an addition
> documentation? I think we need to touch  FTS and JSON parts.


I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Andres Freund
On 2017-04-01 16:20:46 -0400, Andrew Dunstan wrote:
> 
> 
> On 03/31/2017 03:17 PM, Oleg Bartunov wrote:
> >
> >
> > On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthali...@gmail.com
> > > wrote:
> >
> > On 31 March 2017 at 00:01, Andrew Dunstan
> >  > > wrote:
> > >
> > > I have just noticed as I was writing/testing the non-existent
> > docs for
> > > this patch that it doesn't supply variants of to_tsvector that
> > take a
> > > regconfig as the first argument. Is there a reason for that? Why
> > > should the json(b) versions be different from the text versions?
> >
> > No, there is no reason, I just missed that. Here is a new version
> > of the patch (only the functions part)
> > to add those variants.
> >
> >
> > Congratulations with patch committed, who will write an addition
> > documentation? I think we need to touch  FTS and JSON parts.

> I added documentation when I committed it for the new functions, in the
> FTS section. I'm not sure what we need to add to the JSON section if
> anything.

I see that the CF entry for this hasn't been marked as committed:
https://commitfest.postgresql.org/13/1054/
Is there anything left here?

- Andres


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Sven R. Kunze

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.


Not sure, if this is related but the formatting of 
https://www.postgresql.org/docs/devel/static/functions-textsearch.html 
looks a bit strange.


Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

2) Regarding the additional line:
to_tsvector([ config regconfig , ] document json(b))tsvector reduce 
document text to tsvectorto_tsvector('english', '{"a": "The Fat 
Rats"}'::json)'fat':2 'rat':3


Maybe change "reduce document text to tsvector" to "extracting JSON 
values  and reduce to tsvector"?



Sven


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Andrew Dunstan


On 04/03/2017 02:22 PM, Andres Freund wrote:
> On 2017-04-01 16:20:46 -0400, Andrew Dunstan wrote:
>>
>> On 03/31/2017 03:17 PM, Oleg Bartunov wrote:
>>>
>>> On 30 Mar 2017 23:43, "Dmitry Dolgov" <9erthali...@gmail.com
>>> > wrote:
>>>
>>> On 31 March 2017 at 00:01, Andrew Dunstan
>>> >> > wrote:
>>> >
>>> > I have just noticed as I was writing/testing the non-existent
>>> docs for
>>> > this patch that it doesn't supply variants of to_tsvector that
>>> take a
>>> > regconfig as the first argument. Is there a reason for that? Why
>>> > should the json(b) versions be different from the text versions?
>>>
>>> No, there is no reason, I just missed that. Here is a new version
>>> of the patch (only the functions part)
>>> to add those variants.
>>>
>>>
>>> Congratulations with patch committed, who will write an addition
>>> documentation? I think we need to touch  FTS and JSON parts.
>> I added documentation when I committed it for the new functions, in the
>> FTS section. I'm not sure what we need to add to the JSON section if
>> anything.
> I see that the CF entry for this hasn't been marked as committed:
> https://commitfest.postgresql.org/13/1054/
> Is there anything left here?
>


Says "Status committed" for me. I fixed this in Sunday after Tom prodded me.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Andrew Dunstan


On 04/03/2017 02:44 PM, Sven R. Kunze wrote:
> On 01.04.2017 22:20, Andrew Dunstan wrote:
>> I added documentation when I committed it for the new functions, in the
>> FTS section. I'm not sure what we need to add to the JSON section if
>> anything.
>
> Not sure, if this is related but the formatting of
> https://www.postgresql.org/docs/devel/static/functions-textsearch.html
> looks a bit strange.
>
> Just 2 questions/notes:
> 1) in what order are the values of the JSON extracted?

In the order they exist in the underlying document.

>
> 2) Regarding the additional line:
> to_tsvector([ config regconfig , ] document json(b))tsvector
> reduce document text to tsvectorto_tsvector('english', '{"a": "The
> Fat Rats"}'::json)'fat':2 'rat':3
>
> Maybe change "reduce document text to tsvector" to "extracting JSON
> values  and reduce to tsvector"?
>
>


OK, I will do something along those lines.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Sven R. Kunze

On 03.04.2017 21:30, Andrew Dunstan wrote:

On 04/03/2017 02:44 PM, Sven R. Kunze wrote:

On 01.04.2017 22:20, Andrew Dunstan wrote:

I added documentation when I committed it for the new functions, in the
FTS section. I'm not sure what we need to add to the JSON section if
anything.

Not sure, if this is related but the formatting of
https://www.postgresql.org/docs/devel/static/functions-textsearch.html
looks a bit strange.

Just 2 questions/notes:
1) in what order are the values of the JSON extracted?

In the order they exist in the underlying document.


Just asking as the order can have implications for fulltext searches. 
So, might be valuable for the docs.



Are these documents equally ordered in this sense?

srkunze=# select '{"a": "abc", "b": "def"}'::jsonb;
  jsonb
--
 {"a": "abc", "b": "def"}
(1 row)

srkunze=# select '{"b": "def", "a": "abc"}'::jsonb;
  jsonb
--
 {"a": "abc", "b": "def"}
(1 row)


Also what about non-ascii keys? Are they ordered by the default locale 
of the PostgreSQL cluster (say de_DE.utf-8)?



2) Regarding the additional line:
to_tsvector([ config regconfig , ] document json(b))tsvector
reduce document text to tsvectorto_tsvector('english', '{"a": "The
Fat Rats"}'::json)'fat':2 'rat':3

Maybe change "reduce document text to tsvector" to "extracting JSON
values  and reduce to tsvector"?




OK, I will do something along those lines.

cheers

andrew





--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-04-03 Thread Andrew Dunstan


On 04/03/2017 03:41 PM, Sven R. Kunze wrote:
> On 03.04.2017 21:30, Andrew Dunstan wrote:
>> On 04/03/2017 02:44 PM, Sven R. Kunze wrote:
>>> On 01.04.2017 22:20, Andrew Dunstan wrote:
 I added documentation when I committed it for the new functions, in
 the
 FTS section. I'm not sure what we need to add to the JSON section if
 anything.
>>> Not sure, if this is related but the formatting of
>>> https://www.postgresql.org/docs/devel/static/functions-textsearch.html
>>> looks a bit strange.
>>>
>>> Just 2 questions/notes:
>>> 1) in what order are the values of the JSON extracted?
>> In the order they exist in the underlying document.
>
> Just asking as the order can have implications for fulltext searches.
> So, might be valuable for the docs.
>
>
> Are these documents equally ordered in this sense?
>
> srkunze=# select '{"a": "abc", "b": "def"}'::jsonb;
>   jsonb
> --
>  {"a": "abc", "b": "def"}
> (1 row)
>
> srkunze=# select '{"b": "def", "a": "abc"}'::jsonb;
>   jsonb
> --
>  {"a": "abc", "b": "def"}
> (1 row)
>


Yes, when converted to jsonb these two documents are identical.


>
> Also what about non-ascii keys? Are they ordered by the default locale
> of the PostgreSQL cluster (say de_DE.utf-8)?


Yes, I believe so.

cheers

andrew

-- 
Andrew Dunstanhttps://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-02-28 Thread Oleg Bartunov
The proposed patch looks not very important, but I consider it as an
important feature, which Oracle and Microsoft already have, that's why I
asked Dmitry to work on this and made it before feature freeze. My comments
follows below the post.

On Tue, Feb 28, 2017 at 1:59 PM, Dmitry Dolgov <9erthali...@gmail.com>
wrote:

> Hi all
>
> I would like to propose patch with a set of new small functions for fts in
> case of
> jsonb data type:
>
> * to_tsvector(config, jsonb) - make a tsvector from all string values and
>   elements of jsonb object. To prevent the situation, when tsquery can
> find a
>   phrase consisting of lexemes from two different values/elements, this
>   function will add an increment to position of each lexeme from every new
>   value/element.
>
> * ts_headline(config, jsonb, tsquery, options) - generate a headline
> directly
>   from jsonb object
>
> Here are the examples how they work:
>
> ```
> =# select to_tsvector('{"a": "aaa bbb", "b": ["ccc ddd"], "c": {"d": "eee
> fff"}}'::jsonb);
>to_tsvector
> -
>  'aaa':1 'bbb':2 'ccc':4 'ddd':5 'eee':7 'fff':8
> (1 row)
>
>
> =# select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc
> ddd"}}'::jsonb, tsquery('bbb & ddd & hhh'), 'StartSel = <, StopSel = >');
>  ts_headline
> --
>  aaa  ccc 
> (1 row)
> ```
>

> Any comments or suggestions?
>

1. add json support
2. Its_headline  should returns the original json with highlighting.  As a
first try the proposed ts_headline  could be ok, probably need special
option.



> --
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>
>


Re: [HACKERS] [PATCH] few fts functions for jsonb

2017-03-10 Thread Dmitry Dolgov
> On 28 February 2017 at 19:21, Oleg Bartunov  wrote:
> 1. add json support

I've added json support for all functions.

>  Its_headline  should returns the original json with highlighting

Yes, I see now. I don't think it's worth it to add a special option for that
purpose, so I've just changed the implementation to return the original
json(b).
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index 6e5de8f..8f7bcfe 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -16,6 +16,7 @@
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
+#include "utils/jsonb.h"
 
 
 typedef struct MorphOpaque
@@ -24,6 +25,14 @@ typedef struct MorphOpaque
 	int			qoperator;		/* query operator */
 } MorphOpaque;
 
+typedef struct TSVectorBuildState
+{
+	ParsedText	*prs;
+	TSVector	result;
+	Oid			cfgId;
+} TSVectorBuildState;
+
+static void add_to_tsvector(void *state, char *elem_value, int elem_len);
 
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
@@ -256,6 +265,109 @@ to_tsvector(PG_FUNCTION_ARGS)
 		PointerGetDatum(in)));
 }
 
+Datum
+jsonb_to_tsvector(PG_FUNCTION_ARGS)
+{
+	Jsonb*jb = PG_GETARG_JSONB(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_jsonb_values(jb, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(jb, 1);
+
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in jsonb,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+Datum
+json_to_tsvector(PG_FUNCTION_ARGS)
+{
+	text*json = PG_GETARG_TEXT_P(0);
+	TSVectorBuildState	state;
+	ParsedText			*prs = (ParsedText *) palloc(sizeof(ParsedText));
+
+	prs->words = NULL;
+	state.result = NULL;
+	state.cfgId = getTSCurrentConfig(true);
+	state.prs = prs;
+
+	iterate_json_values(json, &state, (JsonIterateAction) add_to_tsvector);
+
+	PG_FREE_IF_COPY(json, 1);
+	if (state.result == NULL)
+	{
+		/* There weren't any string elements in json,
+		 * so wee need to return an empty vector */
+
+		if (prs->words != NULL)
+			pfree(prs->words);
+
+		state.result = palloc(CALCDATASIZE(0, 0));
+		SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
+		state.result->size = 0;
+	}
+
+	PG_RETURN_TSVECTOR(state.result);
+}
+
+/*
+ * Extend current TSVector from _state with a new one,
+ * build over a json(b) element.
+ */
+static void
+add_to_tsvector(void *_state, char *elem_value, int elem_len)
+{
+	TSVectorBuildState *state = (TSVectorBuildState *) _state;
+	ParsedText	*prs = state->prs;
+	TSVector	item_vector;
+	int			i;
+
+	prs->lenwords = elem_len / 6;
+	if (prs->lenwords == 0)
+		prs->lenwords = 2;
+
+	prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
+	prs->curwords = 0;
+	prs->pos = 0;
+
+	parsetext(state->cfgId, prs, elem_value, elem_len);
+
+	if (prs->curwords)
+	{
+		if (state->result != NULL)
+		{
+			for (i = 0; i < prs->curwords; i++)
+prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
+
+			item_vector = make_tsvector(prs);
+
+			state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
+	TSVectorGetDatum(state->result),
+	PointerGetDatum(item_vector));
+		}
+		else
+			state->result = make_tsvector(prs);
+	}
+}
+
 /*
  * to_tsquery
  */
diff --git a/src/backend/tsearch/wparser.c b/src/backend/tsearch/wparser.c
index 8ca1c62..b648996 100644
--- a/src/backend/tsearch/wparser.c
+++ b/src/backend/tsearch/wparser.c
@@ -21,6 +21,7 @@
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 #include "utils/varlena.h"
+#include "utils/jsonb.h"
 
 
 /**sql-level interface**/
@@ -31,6 +32,19 @@ typedef struct
 	LexDescr   *list;
 } TSTokenTypeStorage;
 
+/* state for ts_headline_json_* */
+typedef struct HeadlineJsonState
+{
+	HeadlineParsedText *prs;
+	TSConfigCacheEntry *cfg;
+	TSParserCacheEntry *prsobj;
+	TSQueryquery;
+	List*prsoptions;
+	booltransformed;
+} HeadlineJsonState;
+
+static text * headline_json_value(void *_state, char *elem_value, int elem_len);
+
 static void
 tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
 {
@@ -362,3 +376,177 @@ ts_headline_opt(PG_FUNCTION_ARGS)
 		PG_GETARG_DATUM(1),
 		PG_GETARG_DATUM(2)));
 }
+
+Datum
+ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
+{
+	Jsonb			*out, *jb = PG_GETARG_JSONB(1);
+	TSQuery			query = PG_GETARG_TSQUERY(2);
+	text			*opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
+
+	HeadlineParsedText prs;
+	HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
+
+	memset(&prs, 0, sizeof(HeadlineParsedText));
+	prs.lenwords = 32;
+	prs.words = (HeadlineWordEntry *