Here's a patch implementing custom parsers for data types mentioned in
http://archives.postgresql.org/pgsql-hackers/2010-12/msg01991.php. It's
an incremental patch on top of the plpython-refactor patch sent eariler.

Git branch for this patch:
https://github.com/wulczer/postgres/tree/custom-parsers.

The idea has been discussed in
http://archives.postgresql.org/pgsql-hackers/2010-12/msg01307.php.

With that patch, when built with --with-python, the hstore module
includes code that adds a GUC called plpython.hstore.

This GUC should be set to the full name of the hstore datatype, for
instance plpython.hstore = 'public.hstore'.

If it is set, the datatype's OID is looked up and hstore sets up a
rendezvous variable called PLPYTHON_<OID>_PARSERS that points to two
functions that can convert a hstore Datum to a PyObject and back.

PL/Python ot the other hand when it sees an argument with an unknown
type tries to look up a rendezvous variable using the type's OID and if
it finds it, it uses the parser functions pointed at by that variable.

Long story short, it works so:

LOAD 'hstore';
SET plpython.hstore = 'public.hstore'
CREATE FUNCTION pick_one(h hstore, key text) RETURNS hstore AS $$ return
{key: h[key]} $$ LANGUAGE plpythonu;
SELECT pick_one('a=>3,b=>4', 'b')
-- gives bask a hstore 'b=>4'

There's some ugliness with how hstore's Makefile handles building it,
and I'm not sure what's needed to make it work with the Windows build
system. Also, documentation is missing. It's already usable, but if we
decide to commit that, I'll probably need some help with Windows and docs.

I first tried to make hstore generate a separate .so with that
functionality if --with-python was specified, but couldn't convince the
Makefile to do that. So if you configure the tree with --with-python,
hstore will link to libpython, maybe that's OK?

Cheers,
Jan

PS: of course, once committed we can add custom parsers for isbn,
citext, uuids, cubes, and other weird things.

J
diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile
index e466b6f..dbeeb89 100644
*** a/contrib/hstore/Makefile
--- b/contrib/hstore/Makefile
*************** top_builddir = ../..
*** 5,12 ****
  include $(top_builddir)/src/Makefile.global
  
  MODULE_big = hstore
  OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \
! 	crc32.o
  
  DATA_built = hstore.sql
  DATA = uninstall_hstore.sql
--- 5,21 ----
  include $(top_builddir)/src/Makefile.global
  
  MODULE_big = hstore
+ 
  OBJS = hstore_io.o hstore_op.o hstore_gist.o hstore_gin.o hstore_compat.o \
! 	hstore_plpython.o crc32.o
! 
! ifeq ($(with_python),yes)
! 
! PG_CPPFLAGS := -I$(srcdir) -I$(top_builddir)/src/pl/plpython \
! 			$(python_includespec) -DHSTORE_PLPYTHON_SUPPORT
! SHLIB_LINK = $(python_libspec) $(python_additional_libs) \
! 		$(filter -lintl,$(LIBS)) $(CPPFLAGS)
! endif
  
  DATA_built = hstore.sql
  DATA = uninstall_hstore.sql
diff --git a/contrib/hstore/hstore.h b/contrib/hstore/hstore.h
index 8906397..6edfc70 100644
*** a/contrib/hstore/hstore.h
--- b/contrib/hstore/hstore.h
*************** extern Pairs *hstoreArrayToPairs(ArrayTy
*** 174,179 ****
--- 174,182 ----
  #define HStoreExistsAllStrategyNumber	11
  #define HStoreOldContainsStrategyNumber 13		/* backwards compatibility */
  
+ /* PL/Python support */
+ extern void hstore_plpython_init(void);
+ 
  /*
   * defining HSTORE_POLLUTE_NAMESPACE=0 will prevent use of old function names;
   * for now, we default to on for the benefit of people restoring old dumps
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index 0d6f0b6..92c8db9 100644
*** a/contrib/hstore/hstore_io.c
--- b/contrib/hstore/hstore_io.c
*************** PG_MODULE_MAGIC;
*** 20,25 ****
--- 20,26 ----
  /* old names for C functions */
  HSTORE_POLLUTE(hstore_from_text, tconvert);
  
+ void _PG_init(void);
  
  typedef struct
  {
*************** hstore_send(PG_FUNCTION_ARGS)
*** 1211,1213 ****
--- 1212,1220 ----
  
  	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
  }
+ 
+ void
+ _PG_init(void)
+ {
+ 	hstore_plpython_init();
+ }
diff --git a/contrib/hstore/hstore_plpython.c b/contrib/hstore/hstore_plpython.c
index ...081a33e .
*** a/contrib/hstore/hstore_plpython.c
--- b/contrib/hstore/hstore_plpython.c
***************
*** 0 ****
--- 1,249 ----
+ /*
+  * contrib/src/hstore_plpython.c
+  *
+  * bidirectional transformation between hstores and Python dictionary objects
+  */
+ 
+ /* Only build if PL/Python support is needed */
+ #if defined(HSTORE_PLPYTHON_SUPPORT)
+ 
+ #if defined(_MSC_VER) && defined(_DEBUG)
+ /* Python uses #pragma to bring in a non-default libpython on VC++ if
+  * _DEBUG is defined */
+ #undef _DEBUG
+ /* Also hide away errcode, since we load Python.h before postgres.h */
+ #define errcode __msvc_errcode
+ #include <Python.h>
+ #undef errcode
+ #define _DEBUG
+ #elif defined (_MSC_VER)
+ #define errcode __msvc_errcode
+ #include <Python.h>
+ #undef errcode
+ #else
+ #include <Python.h>
+ #endif
+ 
+ #include "postgres.h"
+ #include "utils/guc.h"
+ #include "utils/builtins.h"
+ #include "utils/syscache.h"
+ #include "catalog/namespace.h"
+ 
+ #include "plpython.h"
+ #include "hstore.h"
+ 
+ static Oid get_hstore_oid(const char *name);
+ static void set_hstore_parsers(Oid);
+ 
+ static PyObject *hstore_to_dict(void *, Datum);
+ static Datum dict_to_hstore(void *, int32, PyObject *);
+ 
+ /* GUC variables */
+ 
+ static char *hstore_name;
+ 
+ /* Previous hstore OID */
+ 
+ static Oid previous;
+ 
+ PLyParsers parsers = {
+ 	.in = hstore_to_dict,
+ 	.out = dict_to_hstore
+ };
+ 
+ static PyObject *
+ hstore_to_dict(void *ignored, Datum d)
+ {
+ 	HStore		*hstore = DatumGetHStoreP(d);
+     char        *base;
+     HEntry      *entries;
+     int          count;
+     int          i;
+     PyObject    *ret;
+ 
+ 	base = STRPTR(hstore);
+     entries = ARRPTR(hstore);
+ 
+     ret = PyDict_New();
+ 
+     count = HS_COUNT(hstore);
+ 
+     for (i = 0; i < count; i++)
+ 	{
+             PyObject *key, *val;
+ 
+             key = PyString_FromStringAndSize(HS_KEY(entries, base, i),
+                                              HS_KEYLEN(entries, i));
+             if (HS_VALISNULL(entries, i)) {
+                 Py_INCREF(Py_None);
+                 val = Py_None;
+             }
+             else {
+                 val = PyString_FromStringAndSize(HS_VAL(entries, base, i),
+                                                  HS_VALLEN(entries, i));
+             }
+ 
+             PyDict_SetItem(ret, key, val);
+         }
+ 
+     return ret;
+ }
+ 
+ static Datum
+ dict_to_hstore(void *ignored, int32 typmod, PyObject *dict)
+ {
+     HStore      *hstore;
+     int          pcount;
+ 	Pairs		*pairs;
+ 	PyObject	*key;
+ 	PyObject	*value;
+ 	Py_ssize_t	 pos;
+ 	char		*keys;
+ 	char		*vals;
+ 	int			 keylen;
+ 	int			 vallen;
+ 	int			 buflen;
+ 	int			 i;
+ 
+ 	if (!PyDict_Check(dict))
+ 		ereport(ERROR,
+ 				(errmsg("hstores can only be constructed "
+ 						"from Python dictionaries")));
+ 
+ 	pcount = PyDict_Size(dict);
+ 	pairs = palloc(pcount * sizeof(Pairs));
+ 	pos = i = 0;
+ 	/* loop over the dictionary, creating a Pair for each key/value pair */
+ 	while (PyDict_Next(dict, &pos, &key, &value)) {
+ 		if (!PyString_Check(key))
+ 			elog(ERROR, "hstore keys have to be strings");
+ 
+ 		PyString_AsStringAndSize(key, &keys, &keylen);
+ 
+ 		if (strlen(keys) != keylen)
+ 			elog(ERROR, "hstore keys cannot contain NUL bytes");
+ 
+ 		pairs[i].key = pstrdup(keys);
+ 		pairs[i].keylen = hstoreCheckKeyLen(keylen);
+ 		pairs[i].needfree = true;
+ 
+ 		if (value == Py_None) {
+ 			pairs[i].val = NULL;
+ 			pairs[i].vallen = 0;
+ 			pairs[i].isnull = true;
+ 		}
+ 		else {
+ 			if (!PyString_Check(value))
+ 				elog(ERROR, "hstore values have to be strings");
+ 
+ 			PyString_AsStringAndSize(value, &vals, &vallen);
+ 
+ 			if (strlen(vals) != vallen)
+ 				elog(ERROR, "hstore values cannot contain NUL bytes");
+ 
+ 			pairs[i].val = pstrdup(vals);
+ 			pairs[i].vallen = hstoreCheckValLen(vallen);
+ 			pairs[i].isnull = false;
+ 		}
+ 
+ 		i++;
+ 	}
+ 	pcount = hstoreUniquePairs(pairs, pcount, &buflen);
+ 	hstore = hstorePairs(pairs, pcount, buflen);
+ 
+ 	return PointerGetDatum(hstore);
+ }
+ 
+ static const char *
+ recheck_hstore_oid(const char *newvalue, bool doit, GucSource source)
+ {
+ 	Oid	hstore_oid;
+ 
+ 	if (newvalue == NULL)
+ 		return NULL;
+ 
+ 	hstore_oid = get_hstore_oid(newvalue);
+ 
+ 	if (*newvalue && !OidIsValid(hstore_oid))
+ 		return NULL;
+ 
+ 	if (doit)
+ 		set_hstore_parsers(hstore_oid);
+ 
+ 	return newvalue;
+ }
+ 
+ void
+ hstore_plpython_init(void)
+ {
+ 	DefineCustomStringVariable("plpython.hstore",
+ 	  "The fully qualified name of the hstore type.",
+ 							   NULL,
+ 							   &hstore_name,
+ 							   NULL,
+ 							   PGC_SUSET,
+ 							   0,
+ 							   recheck_hstore_oid,
+ 							   NULL);
+ 
+ 	EmitWarningsOnPlaceholders("plpython");
+ 
+ 	previous = InvalidOid;
+ 
+ 	if (hstore_name && *hstore_name)
+ 		recheck_hstore_oid(hstore_name, true, PGC_S_FILE);
+ }
+ 
+ static Oid
+ get_hstore_oid(const char *name)
+ {
+ 	text		*text_name;
+ 	List		*hstore_name;
+ 	char		*type_name;
+ 	Oid			 type_namespace;
+ 	Oid			 typoid;
+ 
+ 	Assert(name != NULL);
+ 
+ 	if (!(*name))
+ 		return InvalidOid;
+ 
+ 	text_name = cstring_to_text(name);
+ 	hstore_name = textToQualifiedNameList(text_name);
+ 	pfree(text_name);
+ 
+ 	type_namespace = QualifiedNameGetCreationNamespace(hstore_name, &type_name);
+ 
+ 	typoid = GetSysCacheOid2(TYPENAMENSP,
+ 							 CStringGetDatum(type_name),
+ 							 ObjectIdGetDatum(type_namespace));
+ 
+ 	return typoid;
+ }
+ 
+ static void
+ set_hstore_parsers(Oid hstore_oid)
+ {
+ 	char		 name[NAMEDATALEN];
+ 
+ 	if (OidIsValid(previous))
+ 	{
+ 		snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, previous);
+ 		*find_rendezvous_variable(name) = NULL;
+ 	}
+ 
+ 	if (OidIsValid(hstore_oid))
+ 	{
+ 		snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, hstore_oid);
+ 		*find_rendezvous_variable(name) = &parsers;
+ 		previous = hstore_oid;
+ 	}
+ }
+ 
+ #else	/* !defined(HSTORE_PLPYTHON_SUPPORT) */
+ 
+ void
+ hstore_plpython_init(void) {};
+ 
+ #endif	/* defined(HSTORE_PLPYTHON_SUPPORT) */
diff --git a/src/pl/plpython/plpython.c b/src/pl/plpython/plpython.c
index 67eb0f3..a4d3528 100644
*** a/src/pl/plpython/plpython.c
--- b/src/pl/plpython/plpython.c
*************** typedef int Py_ssize_t;
*** 90,95 ****
--- 90,97 ----
  #include <fcntl.h>
  
  /* postgreSQL stuff */
+ #include "plpython.h"
+ 
  #include "catalog/pg_proc.h"
  #include "catalog/pg_type.h"
  #include "commands/trigger.h"
*************** static PyObject *PLyList_FromArray(PLyDa
*** 347,352 ****
--- 349,357 ----
  
  static PyObject *PLyDict_FromTuple(PLyTypeInfo *, HeapTuple, TupleDesc);
  
+ static PLyParserIn PLy_get_custom_input_function(Oid oid);
+ static PLyParserOut PLy_get_custom_output_function(Oid oid);
+ 
  static Datum PLyObject_ToBool(PLyObToDatum *, int32, PyObject *);
  static Datum PLyObject_ToBytea(PLyObToDatum *, int32, PyObject *);
  static Datum PLyObject_ToDatum(PLyObToDatum *, int32, PyObject *);
*************** PLy_output_datum_func2(PLyObToDatum *arg
*** 1789,1794 ****
--- 1794,1800 ----
  {
  	Form_pg_type typeStruct = (Form_pg_type) GETSTRUCT(typeTup);
  	Oid			element_type;
+ 	Oid			argument_type;
  
  	perm_fmgr_info(typeStruct->typinput, &arg->typfunc);
  	arg->typoid = HeapTupleGetOid(typeTup);
*************** PLy_output_datum_func2(PLyObToDatum *arg
*** 1796,1807 ****
  	arg->typbyval = typeStruct->typbyval;
  
  	element_type = get_element_type(arg->typoid);
  
  	/*
  	 * Select a conversion function to convert Python objects to PostgreSQL
  	 * datums.	Most data types can go through the generic function.
  	 */
! 	switch (getBaseType(element_type ? element_type : arg->typoid))
  	{
  		case BOOLOID:
  			arg->func = PLyObject_ToBool;
--- 1802,1814 ----
  	arg->typbyval = typeStruct->typbyval;
  
  	element_type = get_element_type(arg->typoid);
+ 	argument_type = getBaseType(element_type ? element_type : arg->typoid);
  
  	/*
  	 * Select a conversion function to convert Python objects to PostgreSQL
  	 * datums.	Most data types can go through the generic function.
  	 */
! 	switch (argument_type)
  	{
  		case BOOLOID:
  			arg->func = PLyObject_ToBool;
*************** PLy_output_datum_func2(PLyObToDatum *arg
*** 1810,1816 ****
  			arg->func = PLyObject_ToBytea;
  			break;
  		default:
! 			arg->func = PLyObject_ToDatum;
  			break;
  	}
  
--- 1817,1829 ----
  			arg->func = PLyObject_ToBytea;
  			break;
  		default:
! 			/* Last ditch effort of finding a rendezvous variable pointing to
! 			 * a parser function, useful for extension modules plugging in
! 			 * their own parsers
! 			 */
! 			arg->func = (PLyObToDatumFunc) PLy_get_custom_output_function(argument_type);
! 			if (arg->func == NULL)
! 				arg->func = PLyObject_ToDatum;
  			break;
  	}
  
*************** PLy_input_datum_func2(PLyDatumToOb *arg,
*** 1852,1857 ****
--- 1865,1871 ----
  {
  	Form_pg_type typeStruct = (Form_pg_type) GETSTRUCT(typeTup);
  	Oid			element_type = get_element_type(typeOid);
+ 	Oid			argument_type;
  
  	/* Get the type's conversion information */
  	perm_fmgr_info(typeStruct->typoutput, &arg->typfunc);
*************** PLy_input_datum_func2(PLyDatumToOb *arg,
*** 1861,1868 ****
  	arg->typlen = typeStruct->typlen;
  	arg->typalign = typeStruct->typalign;
  
  	/* Determine which kind of Python object we will convert to */
! 	switch (getBaseType(element_type ? element_type : typeOid))
  	{
  		case BOOLOID:
  			arg->func = PLyBool_FromBool;
--- 1875,1884 ----
  	arg->typlen = typeStruct->typlen;
  	arg->typalign = typeStruct->typalign;
  
+ 	argument_type = getBaseType(element_type ? element_type : typeOid);
+ 
  	/* Determine which kind of Python object we will convert to */
! 	switch (argument_type)
  	{
  		case BOOLOID:
  			arg->func = PLyBool_FromBool;
*************** PLy_input_datum_func2(PLyDatumToOb *arg,
*** 1889,1895 ****
  			arg->func = PLyBytes_FromBytea;
  			break;
  		default:
! 			arg->func = PLyString_FromDatum;
  			break;
  	}
  
--- 1905,1917 ----
  			arg->func = PLyBytes_FromBytea;
  			break;
  		default:
! 			/* Last ditch effort of finding a rendezvous variable pointing to
! 			 * a parser function, useful for extension modules plugging in
! 			 * their own parsers
! 			 */
! 			arg->func = (PLyDatumToObFunc) PLy_get_custom_input_function(argument_type);
! 			if (arg->func == NULL)
! 				arg->func = PLyString_FromDatum;
  			break;
  	}
  
*************** PLy_typeinfo_dealloc(PLyTypeInfo *arg)
*** 1930,1935 ****
--- 1952,1991 ----
  	}
  }
  
+ /*
+  * Getting the parser functions from a rendezvous variable set by another
+  * extension.
+  */
+ static PLyParserIn
+ PLy_get_custom_input_function(Oid oid)
+ {
+ 	PLyParsers	*parsers;
+ 	char		 name[NAMEDATALEN];
+ 
+ 	snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, oid);
+ 	parsers = *find_rendezvous_variable(name);
+ 
+ 	if (parsers == NULL)
+ 		return NULL;
+ 
+ 	return parsers->in;
+ }
+ 
+ static PLyParserOut
+ PLy_get_custom_output_function(Oid oid)
+ {
+ 	PLyParsers	*parsers;
+ 	char		 name[NAMEDATALEN];
+ 
+ 	snprintf(name, NAMEDATALEN, PARSERS_VARIABLE_PATTERN, oid);
+ 	parsers = *find_rendezvous_variable(name);
+ 
+ 	if (parsers == NULL)
+ 		return NULL;
+ 
+ 	return parsers->out;
+ }
+ 
  static PyObject *
  PLyBool_FromBool(PLyDatumToOb *arg, Datum d)
  {
diff --git a/src/pl/plpython/plpython.h b/src/pl/plpython/plpython.h
index ...53d25b7 .
*** a/src/pl/plpython/plpython.h
--- b/src/pl/plpython/plpython.h
***************
*** 0 ****
--- 1,40 ----
+ /*
+  * src/pl/plpython/plpython.h
+  */
+ #ifndef __PLPYTHON_H__
+ #define __PLPYTHON_H__
+ 
+ 
+ 
+ /*
+  * Rendezvous variable pattern for parsers exported from other extensions
+  *
+  * An extension providing parsres for type X should look up the type's OID and
+  * set a rendezvous variable using this pattern that points to a PLyParsers
+  * structure. PL/Python will then use these parsers for arguments with that
+  * OID.
+  */
+ #define PARSERS_VARIABLE_PATTERN "plpython_%u_parsers"
+ 
+ /*
+  * Types for parsres functions that other modules can export to transform
+  * Datums into PyObjects and back. The types need to be compatible with
+  * PLyObToDatumFunc and PLyDatumToObFunc, but we don't want to expose too much
+  * of plpython.c's guts here, so the first arguments is mandated to be a void
+  * pointer that should not be touched. An extension should know exactly what
+  * it's dealing with, so there's no need for it to look at anything contained
+  * in PLyTypeInfo, which is what gets passed here.
+  *
+  * The output parser also gets the type's typmod, which might actually be
+  * useful.
+  */
+ typedef PyObject *(*PLyParserIn) (void *, Datum);
+ typedef Datum (*PLyParserOut) (void *, int32, PyObject *);
+ 
+ typedef struct PLyParsers
+ {
+ 	PLyParserIn			in;
+ 	PLyParserOut		out;
+ } PLyParsers;
+ 
+ #endif   /* __PLPYTHON_H__ */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to