Hans-Juergen Schoenig -- PostgreSQL wrote:
hello,

this patch has not made it through yesterday, so i am trying to send it again.
i made a small patch which i found useful for my personal tasks.
it would be nice to see this in 8.5. if not core then maybe contrib.
it transforms a tsvector to table format which is really nice for text processing and comparison.

test=# SELECT * FROM tsvcontent(to_tsvector('english', 'i am pretty sure this is a good patch'));
lex   | rank
--------+------
good   |    8
patch  |    9
pretti |    3
sure   |    4
(4 rows)

  many thanks,

     hans



--
Cybertec Schoenig & Schoenig GmbH
Reyergasse 9 / 2
A-2700 Wiener Neustadt
Web: www.postgresql-support.de

diff -dcrpN postgresql-8.4.0.old/contrib/Makefile postgresql-8.4.0/contrib/Makefile
*** postgresql-8.4.0.old/contrib/Makefile	2009-03-26 00:20:01.000000000 +0100
--- postgresql-8.4.0/contrib/Makefile	2009-06-29 11:03:04.000000000 +0200
*************** WANTED_DIRS = \
*** 39,44 ****
--- 39,45 ----
  		tablefunc	\
  		test_parser	\
  		tsearch2	\
+ 		tsvcontent	\
  		vacuumlo
  
  ifeq ($(with_openssl),yes)
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/Makefile postgresql-8.4.0/contrib/tsvcontent/Makefile
*** postgresql-8.4.0.old/contrib/tsvcontent/Makefile	1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/Makefile	2009-06-29 11:20:21.000000000 +0200
***************
*** 0 ****
--- 1,19 ----
+ # $PostgreSQL: pgsql/contrib/tablefunc/Makefile,v 1.9 2007/11/10 23:59:51 momjian Exp $
+ 
+ MODULES = tsvcontent
+ DATA_built = tsvcontent.sql
+ DATA = uninstall_tsvcontent.sql
+ 
+ 
+ SHLIB_LINK += $(filter -lm, $(LIBS))
+ 
+ ifdef USE_PGXS
+ PG_CONFIG = pg_config
+ PGXS := $(shell $(PG_CONFIG) --pgxs)
+ include $(PGXS)
+ else
+ subdir = contrib/tsvcontent
+ top_builddir = ../..
+ include $(top_builddir)/src/Makefile.global
+ include $(top_srcdir)/contrib/contrib-global.mk
+ endif
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.c postgresql-8.4.0/contrib/tsvcontent/tsvcontent.c
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.c	1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.c	2009-06-29 11:18:35.000000000 +0200
***************
*** 0 ****
--- 1,169 ----
+ #include "postgres.h"
+ 
+ #include "fmgr.h"
+ #include "funcapi.h"
+ #include "miscadmin.h"
+ #include "executor/spi.h"
+ #include "lib/stringinfo.h"
+ #include "nodes/nodes.h"
+ #include "utils/builtins.h"
+ #include "utils/lsyscache.h"
+ #include "utils/syscache.h"
+ #include "utils/memutils.h"
+ #include "tsearch/ts_type.h"
+ #include "tsearch/ts_utils.h"
+ #include "catalog/pg_type.h"
+ 
+ #include "tsvcontent.h"
+ 
+ PG_MODULE_MAGIC;
+ 
+ PG_FUNCTION_INFO_V1(tsvcontent);
+ 
+ Datum
+ tsvcontent(PG_FUNCTION_ARGS)
+ {
+ 	FuncCallContext 	*funcctx;
+ 	TupleDesc		ret_tupdesc;
+ 	AttInMetadata		*attinmeta;
+ 	int			call_cntr;
+ 	int			max_calls;
+ 	ts_to_txt_fctx		*fctx;
+ 	Datum			result[2];
+ 	bool			isnull[2] = { false, false };
+ 	MemoryContext 		oldcontext;
+ 
+ 	/* input value containing the TS vector */
+ 	TSVector        	in = PG_GETARG_TSVECTOR(0);
+ 
+ 	/* stuff done only on the first call of the function */
+ 	if (SRF_IS_FIRSTCALL())
+ 	{
+ 		TupleDesc	tupdesc;
+ 		int		i, j;
+ 		char		*wepv_base;
+ 
+ 		/* create a function context for cross-call persistence */
+ 		funcctx = SRF_FIRSTCALL_INIT();
+ 
+ 		/*
+ 		 * switch to memory context appropriate for multiple function calls
+ 		 */
+ 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ 
+ 		switch (get_call_result_type(fcinfo, NULL, &tupdesc))
+ 		{
+ 			case TYPEFUNC_COMPOSITE:
+ 				/* success */
+ 				break;
+ 			case TYPEFUNC_RECORD:
+ 				/* failed to determine actual type of RECORD */
+ 				ereport(ERROR,
+ 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ 						errmsg("function returning record called in context "
+ 								"that cannot accept type record")));
+ 				break;
+ 			default:
+ 				/* result type isn't composite */
+ 				elog(ERROR, "return type must be a row type");
+ 				break;
+ 		}
+ 
+ 		/* make sure we have a persistent copy of the tupdesc */
+ 		tupdesc = CreateTupleDescCopy(tupdesc);
+ 
+ 		/*
+ 		 * Generate attribute metadata needed later to produce tuples from raw
+ 		 * C strings
+ 		 */
+ 		attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ 		funcctx->attinmeta = attinmeta;
+ 
+ 		/* allocate memory */
+ 		fctx = (ts_to_txt_fctx *) palloc(sizeof(ts_to_txt_fctx));
+ 
+ 		wepv_base = (char *)in + offsetof(TSVectorData, entries) + in->size * sizeof(WordEntry);
+ 		
+ 		fctx->n_tsvt = 0;
+ 		for (i = 0; i < in->size; i++)
+ 		{
+ 			if (in->entries[i].haspos)
+ 			{
+ 				WordEntryPosVector *wepv = (WordEntryPosVector *)
+ 								(wepv_base + in->entries[i].pos + SHORTALIGN(in->entries[i].len));
+ 
+ 				fctx->n_tsvt += wepv->npos;
+ 			}
+ 			else
+ 				fctx->n_tsvt++;
+ 		}
+ 
+ 		fctx->tsvt = palloc(fctx->n_tsvt * sizeof(tsvec_tuple));
+ 
+ 		for (i = 0, j = 0; i < in->size; i++)
+ 		{
+ 			int pos = in->entries[i].pos;
+ 			int len = in->entries[i].len;
+ 
+ 			if (in->entries[i].haspos)
+ 			{
+ 				WordEntryPosVector *wepv = (WordEntryPosVector *)
+ 								(wepv_base + in->entries[i].pos + SHORTALIGN(len));
+ 				uint16	npos = wepv->npos;
+ 				int	o;
+ 				for (o = 0; o < npos; o++)
+ 				{
+ 					fctx->tsvt[j].txt = palloc(len + 1);
+ 					memcpy(fctx->tsvt[j].txt, wepv_base + pos, len);
+ 					fctx->tsvt[j].txt[len] = '\0';
+ 					fctx->tsvt[j].pos = wepv->pos[o];
+ 					j++;
+ 				}
+ 			}
+ 			else
+ 			{
+ 				fctx->tsvt[j].txt = palloc(len + 1);
+ 				memcpy(fctx->tsvt[j].txt, wepv_base + pos, len);
+ 				fctx->tsvt[j].txt[len] = '\0';
+ 				fctx->tsvt[j].pos = 0;
+ 				j++;
+ 			}
+ 		}
+ 
+ 		/* total number of tuples to be returned */
+                 funcctx->max_calls = fctx->n_tsvt;
+ 
+ 		funcctx->user_fctx = fctx;
+ 		MemoryContextSwitchTo(oldcontext);
+ 	}
+ 
+ 	funcctx = SRF_PERCALL_SETUP();
+ 
+ 	call_cntr = funcctx->call_cntr;
+ 	max_calls = funcctx->max_calls;
+ 	fctx = funcctx->user_fctx;
+ 
+ 	/* attribute return type and return tuple description */
+ 	attinmeta = funcctx->attinmeta;
+ 	ret_tupdesc = attinmeta->tupdesc;
+ 
+ 	/* are there any records inside the tsvector left? */
+ 	if (call_cntr < max_calls && call_cntr < fctx->n_tsvt)	/* do when there is more left to send */
+ 	{
+ 		HeapTuple	tuple;
+ 
+ 		result[0] = DirectFunctionCall1(textin, CStringGetDatum(fctx->tsvt[call_cntr].txt));
+ 		result[1] = Int32GetDatum(fctx->tsvt[call_cntr].pos);
+ 
+ 		tuple = heap_form_tuple(ret_tupdesc, result, isnull);
+ 
+ 		/* send the result */
+ 		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ 	}
+ 	else
+ 	{
+ 		/* do when there is no more left */
+ 		SRF_RETURN_DONE(funcctx);
+ 	}
+ }
+ 
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.h postgresql-8.4.0/contrib/tsvcontent/tsvcontent.h
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.h	1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.h	2009-06-29 11:18:13.000000000 +0200
***************
*** 0 ****
--- 1,13 ----
+ typedef struct
+ {
+ 	char	*txt;
+ 	int	pos;
+ } tsvec_tuple;
+ 
+ typedef struct
+ {
+ 	int		n_tsvt;
+ 	tsvec_tuple	*tsvt;
+ } ts_to_txt_fctx;
+ 
+ extern Datum tsvcontent(PG_FUNCTION_ARGS);
diff -dcrpN postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.sql.in postgresql-8.4.0/contrib/tsvcontent/tsvcontent.sql.in
*** postgresql-8.4.0.old/contrib/tsvcontent/tsvcontent.sql.in	1970-01-01 01:00:00.000000000 +0100
--- postgresql-8.4.0/contrib/tsvcontent/tsvcontent.sql.in	2009-06-29 11:19:04.000000000 +0200
***************
*** 0 ****
--- 1,6 ----
+ CREATE TYPE tsvcontent AS (lex text, rank integer);
+ 
+ -- List words in "tsvector format" and their occurences found in a tsvector.
+ CREATE OR REPLACE FUNCTION tsvcontent(vec tsvector) RETURNS SETOF tsvcontent
+ 	AS '$libdir/tsvcontent', 'tsvcontent'
+ 	LANGUAGE C STRICT;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to