Dmitry Dolgov wrote:
On 6 April 2018 at 16:25, Teodor Sigaev <teo...@sigaev.ru> wrote:
1) I don't like jsonb_all_to_tsvector too.. What if we will accept new
variant to index? Let me suggest:

tsvector jsonb_to_tsvector([regclass,] jsonb, text[])

where text[] arg is actually a flags, array contains any combination of
literals 'numeric', 'string', 'boolean' (and even 'key' to index keys_ to
point which types should be indexed. More than it, may be, it should a jsonb
type for possible improvements in future. For now, it shouldbe a jsonb array
type with string elements described above, example:

select jsonb_to_tsvector('{"a": "aaa in bbb ddd ccc", "b":123}',
                                 '["numeric", "boolean"]');


Form jsonb_to_tsvector('...', '["string"]) is effectively the same as
current to_tsvector(jsonb)

Thank you for the suggestion, this sounds appealing. But I have two questions:

* why it should be a jsonb array, not a regular array?
To simplify extension of this array in future, for example to add limitation of recursion level in converted jsonb, etc.



* it would introduce the idea of jsonb element type expressed in text format,
   so "string", "numeric", "boolean" etc - are there any consequences of that?
   As far as I understand so far there was only jsonb_typeof.
Good catch, jsonb_typeof strings are okay: "string", "number", "boolean"
also  "all", "key", "value"

See workable sketch for parsing jsonb flags and new worker variant.



2) Now it fails, and I see something strange in resuling tsvector

Oh, sorry, stupid copy-paste mistake in the condition. Just for the records,
I've attached fixed version of the previous patch (without any changes about an
array instead of a boolean flag).


by the way:
Datum
jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
    Jsonb      *jb = PG_GETARG_JSONB_P(0);
...
    PG_FREE_IF_COPY(jb, 1); //wrong arg number
}

jsonb_all_to_tsvector and json variants too




--
Teodor Sigaev                                   E-mail: teo...@sigaev.ru
                                                   WWW: http://www.sigaev.ru/
typedef enum JsonToIndex {
	jtiKey		= 0x01,
	jtiString	= 0x02,
	jtiNumeric	= 0x04,
	jtiBool		= 0x08,
	jtiAll		= jtiKey | jtiString | jtiNumeric | jtiBool
} JsonToIndex;

static uint32
parse_jsonb_index_flags(Jsonb *jb)
{
	JsonbIterator *it;
	JsonbValue  v;
	JsonbIteratorToken type;
	uint32				flags = 0;

	it = JsonbIteratorInit(&jb->root);

	type = JsonbIteratorNext(&it, &v, false);

	if (type != WJB_BEGIN_ARRAY)
		elog(ERROR, "wrong flag type");

	while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
	{
		if (v.type != jbvString)
			elog(ERROR, "text is only accepted");

		if (v.val.string.len == 3 &&
				 pg_strncasecmp(v.val.string.val, "all", 3) == 0)
			flags |= jtiAll;
		else if (v.val.string.len == 3 &&
				 pg_strncasecmp(v.val.string.val, "key", 3) == 0)
			flags |= jtiKey;
		else if (v.val.string.len == 6 &&
				 pg_strncasecmp(v.val.string.val, "string", 5) == 0)
			flags |= jtiString;
		else if (v.val.string.len == 7 &&
				 pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
			flags |= jtiNumeric;
		else if (v.val.string.len == 7 &&
				 pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
			flags |= jtiBool;
		else
			elog(ERROR, "string, numeric, boolean, key and all are only accepted");
	}

	if (type != WJB_END_ARRAY)
		elog(ERROR, "wrong flag type");

	JsonbIteratorNext(&it, &v, false);

	return flags;
}


/*
 * Worker function for jsonb(_all)_to_tsvector(_byid)
 */
static TSVector
jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
{
	TSVectorBuildState state;
	ParsedText	prs;
	JsonbIterator *it;
	JsonbValue  v;
	JsonbIteratorToken type;

	prs.words = NULL;
	prs.curwords = 0;
	state.prs = &prs;
	state.cfgId = cfgId;


	it = JsonbIteratorInit(&jb->root);

	type = JsonbIteratorNext(&it, &v, false);

	while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
	{
		if (type == WJB_KEY && (flags & jtiKey))
			add_to_tsvector(&state, v.val.string.val, v.val.string.len);
		else if (type == WJB_VALUE || type == WJB_ELEM)
		{
			switch(v.type)
			{
				case jbvString:
					if (flags & jtiString)
						add_to_tsvector(&state, v.val.string.val,
										v.val.string.len);
					break;
				case jbvNumeric:
					if (flags & jtiNumeric)
					{
						char *val;

						val = DatumGetCString(DirectFunctionCall1(numeric_out,
											  NumericGetDatum(v.val.numeric)));

						add_to_tsvector(&state, val, strlen(val));
					}
					break;
				case jbvBool:
					if (flags & jtiBool)
					{
						if (v.val.boolean)
							add_to_tsvector(&state, pstrdup("true"), 4);
						else
							add_to_tsvector(&state, pstrdup("false"), 5);
					}
					break;
				default:
					break;
			}
		}
	}

	return make_tsvector(&prs);
}

Reply via email to