On 13/07/12 13:38, Jan Urbański wrote:
On 12/07/12 11:08, Heikki Linnakangas wrote:
On 07.07.2012 00:12, Jan Urbański wrote:
So you're in favour of doing unicode -> bytes by encoding with UTF-8 and
then using the server's encoding functions?

Sounds reasonable to me. The extra conversion between UTF-8 and UCS-2
should be quite fast, and it would be good to be consistent in the way
we do conversions in both directions.


I'll implement that than (sorry for not following up on that eariler).

Here's a patch that always encodes Python unicode objects using UTF-8 and then uses Postgres's internal functions to produce bytes in the server encoding.

Cheers,
Jan
diff --git a/src/pl/plpython/plpy_elog.c b/src/pl/plpython/plpy_elog.c
new file mode 100644
index c375ac0..c2b3cb8
*** a/src/pl/plpython/plpy_elog.c
--- b/src/pl/plpython/plpy_elog.c
*************** static char *get_source_line(const char
*** 28,33 ****
--- 28,41 ----
  
  
  /*
+  * Guard agains re-entrant calls to PLy_traceback, which can happen if
+  * traceback formatting functions raise Python errors.
+  */
+ #define TRACEBACK_RECURSION_LIMIT 2
+ static int	recursion_depth = 0;
+ 
+ 
+ /*
   * Emit a PG error or notice, together with any available info about
   * the current Python error, previously set by PLy_exception_set().
   * This should be used to propagate Python errors into PG.	If fmt is
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 147,166 ****
  	StringInfoData xstr;
  	StringInfoData tbstr;
  
  	/*
  	 * get the current exception
  	 */
  	PyErr_Fetch(&e, &v, &tb);
  
  	/*
! 	 * oops, no exception, return
  	 */
! 	if (e == NULL)
  	{
  		*xmsg = NULL;
  		*tbmsg = NULL;
  		*tb_depth = 0;
  
  		return;
  	}
  
--- 155,177 ----
  	StringInfoData xstr;
  	StringInfoData tbstr;
  
+ 	recursion_depth++;
+ 
  	/*
  	 * get the current exception
  	 */
  	PyErr_Fetch(&e, &v, &tb);
  
  	/*
! 	 * oops, no exception or recursion depth exceeded, return
  	 */
! 	if (e == NULL || recursion_depth > TRACEBACK_RECURSION_LIMIT)
  	{
  		*xmsg = NULL;
  		*tbmsg = NULL;
  		*tb_depth = 0;
  
+ 		recursion_depth--;
  		return;
  	}
  
*************** PLy_traceback(char **xmsg, char **tbmsg,
*** 326,331 ****
--- 337,344 ----
  		(*tb_depth)++;
  	}
  
+ 	recursion_depth--;
+ 
  	/* Return the traceback. */
  	*tbmsg = tbstr.data;
  
diff --git a/src/pl/plpython/plpy_util.c b/src/pl/plpython/plpy_util.c
new file mode 100644
index 4aabafc..fe43312
*** a/src/pl/plpython/plpy_util.c
--- b/src/pl/plpython/plpy_util.c
*************** PLy_free(void *ptr)
*** 61,126 ****
  PyObject *
  PLyUnicode_Bytes(PyObject *unicode)
  {
! 	PyObject   *rv;
! 	const char *serverenc;
  
! 	/*
! 	 * Map PostgreSQL encoding to a Python encoding name.
! 	 */
! 	switch (GetDatabaseEncoding())
  	{
! 		case PG_SQL_ASCII:
! 			/*
! 			 * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
! 			 * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
! 			 * SQL_ASCII means that anything is allowed, and the system doesn't
! 			 * try to interpret the bytes in any way. But not sure what else
! 			 * to do, and we haven't heard any complaints...
! 			 */
! 			serverenc = "ascii";
! 			break;
! 		case PG_WIN1250:
! 			serverenc = "cp1250";
! 			break;
! 		case PG_WIN1251:
! 			serverenc = "cp1251";
! 			break;
! 		case PG_WIN1252:
! 			serverenc = "cp1252";
! 			break;
! 		case PG_WIN1253:
! 			serverenc = "cp1253";
! 			break;
! 		case PG_WIN1254:
! 			serverenc = "cp1254";
! 			break;
! 		case PG_WIN1255:
! 			serverenc = "cp1255";
! 			break;
! 		case PG_WIN1256:
! 			serverenc = "cp1256";
! 			break;
! 		case PG_WIN1257:
! 			serverenc = "cp1257";
! 			break;
! 		case PG_WIN1258:
! 			serverenc = "cp1258";
! 			break;
! 		case PG_WIN866:
! 			serverenc = "cp866";
! 			break;
! 		case PG_WIN874:
! 			serverenc = "cp874";
! 			break;
! 		default:
! 			/* Other encodings have the same name in Python. */
! 			serverenc = GetDatabaseEncodingName();
! 			break;
  	}
  
! 	rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
! 	if (rv == NULL)
! 		PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
  	return rv;
  }
  
--- 61,96 ----
  PyObject *
  PLyUnicode_Bytes(PyObject *unicode)
  {
! 	PyObject	*bytes, *rv;
! 	char		*utf8string, *encoded;
  
! 	/* first encode the Python unicode object with UTF-8 */
! 	bytes = PyUnicode_AsUTF8String(unicode);
! 	if (bytes == NULL)
! 		PLy_elog(ERROR, "could not convert Python Unicode object to bytes");
! 
! 	utf8string = PyBytes_AsString(bytes);
! 	if (utf8string == NULL) {
! 		Py_DECREF(bytes);
! 		PLy_elog(ERROR, "could not extract bytes from encoded string");
! 	}
! 
! 	/* then convert to server encoding */
! 	encoded = (char *) pg_do_encoding_conversion((unsigned char *) utf8string,
! 												 strlen(utf8string),
! 												 PG_UTF8,
! 												 GetDatabaseEncoding());
! 
! 	/* finally, build a bytes object in the server encoding */
! 	rv = PyBytes_FromStringAndSize(encoded, strlen(encoded));
! 
! 	/* if pg_do_encoding_conversion allocated memory, free it now */
! 	if (utf8string != encoded)
  	{
! 		pfree(encoded);
  	}
  
! 	Py_DECREF(bytes);
  	return rv;
  }
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to