On Sun, Feb 6, 2011 at 15:31, Andrew Dunstan <and...@dunslane.net> wrote:
> Force strings passed to and from plperl to be in UTF8 encoding.
>
> String are converted to UTF8 on the way into perl and to the
> database encoding on the way back. This avoids a number of
> observed anomalies, and ensures Perl a consistent view of the
> world.

So I noticed a problem while playing with this in my discussion with
David Wheeler. pg_do_encoding() does nothing when the src encoding ==
the dest encoding. That means on a UTF-8 database we fail make sure
our strings are valid utf8.

An easy way to see this is to embed a null in the middle of a string:
=> create or replace function zerob() returns text as $$ return
"abcd\0efg"; $$ language plperl;
=> SELECT zerob();
abcd

Also It seems bogus to bogus to do any encoding conversion when we are
SQL_ASCII, and its really trivial to fix.

With the attached:
- when we are on a utf8 database make sure to verify our output string
in sv2cstr (we assume database strings coming in are already valid)

- Do no string conversion when we are SQL_ASCII in or out

- add plperl_helpers.h as a dep to plperl.o in our makefile

- remove some redundant calls to pg_verify_mbstr()

- as utf_e2u only as one caller dont pstrdup() instead have the caller
check (saves some cycles and memory)
*** a/doc/src/sgml/plperl.sgml
--- b/doc/src/sgml/plperl.sgml
***************
*** 127,135 **** $$ LANGUAGE plperl;
  
    <note>
      <para>
!       Arguments will be converted from the database's encoding to UTF-8 
!       for use inside plperl, and then converted from UTF-8 back to the 
!       database encoding upon return. 
      </para>
    </note>
  
--- 127,136 ----
  
    <note>
      <para>
!       Arguments will be converted from the database's encoding to
!       UTF-8 for use inside plperl, and then converted from UTF-8 back
!       to the database encoding upon return.  Unless the database
!       encoding is SQL_ASCII, then no conversion is done.
      </para>
    </note>
  
*** a/src/pl/plperl/GNUmakefile
--- b/src/pl/plperl/GNUmakefile
***************
*** 54,60 **** PSQLDIR = $(bindir)
  
  include $(top_srcdir)/src/Makefile.shlib
  
! plperl.o: perlchunks.h plperl_opmask.h
  
  plperl_opmask.h: plperl_opmask.pl
  	$(PERL) $< $@
--- 54,60 ----
  
  include $(top_srcdir)/src/Makefile.shlib
  
! plperl.o: perlchunks.h plperl_opmask.h plperl_helpers.h
  
  plperl_opmask.h: plperl_opmask.pl
  	$(PERL) $< $@
*** a/src/pl/plperl/plperl.c
--- b/src/pl/plperl/plperl.c
***************
*** 2308,2315 **** plperl_spi_exec(char *query, int limit)
  	{
  		int			spi_rv;
  
- 		pg_verifymbstr(query, strlen(query), false);
- 
  		spi_rv = SPI_execute(query, current_call_data->prodesc->fn_readonly,
  							 limit);
  		ret_hv = plperl_spi_execute_fetch_result(SPI_tuptable, SPI_processed,
--- 2308,2313 ----
***************
*** 2555,2563 **** plperl_spi_query(char *query)
  		void	   *plan;
  		Portal		portal;
  
- 		/* Make sure the query is validly encoded */
- 		pg_verifymbstr(query, strlen(query), false);
- 
  		/* Create a cursor for the query */
  		plan = SPI_prepare(query, 0, NULL);
  		if (plan == NULL)
--- 2553,2558 ----
***************
*** 2767,2775 **** plperl_spi_prepare(char *query, int argc, SV **argv)
  			qdesc->argtypioparams[i] = typIOParam;
  		}
  
- 		/* Make sure the query is validly encoded */
- 		pg_verifymbstr(query, strlen(query), false);
- 
  		/************************************************************
  		 * Prepare the plan and check for errors
  		 ************************************************************/
--- 2762,2767 ----
*** a/src/pl/plperl/plperl_helpers.h
--- b/src/pl/plperl/plperl_helpers.h
***************
*** 20,27 **** static inline char *
  utf_e2u(const char *str)
  {
  	char *ret = (char*)pg_do_encoding_conversion((unsigned char*)str, strlen(str), GetDatabaseEncoding(), PG_UTF8);
- 	if (ret == str)
- 		ret = pstrdup(ret);
  	return ret;
  }
  
--- 20,25 ----
***************
*** 34,46 **** sv2cstr(SV *sv)
  {
  	char *val;
  	STRLEN len;
  
  	/*
! 	 * get a utf8 encoded char * out of perl. *note* it may not be valid utf8!
  	 */
  	val = SvPVutf8(sv, len);
  
  	/*
  	 * we use perls length in the event we had an embedded null byte to ensure
  	 * we error out properly
  	 */
--- 32,59 ----
  {
  	char *val;
  	STRLEN len;
+ 	int enc = GetDatabaseEncoding();
  
  	/*
! 	 * we dont do any conversion when SQL_ASCII
  	 */
+ 	if (enc == PG_SQL_ASCII)
+ 	{
+ 		val = SvPV(sv, len);
+ 		return pstrdup(val);
+ 	}
+ 
  	val = SvPVutf8(sv, len);
  
  	/*
+ 	 * when the src encoding matches the dest encoding pg_do_encoding will not
+ 	 * do any verification. SvPVutf8() may return invalid utf8 so we need to do
+ 	 * that ourselves
+ 	 */
+ 	if (enc == PG_UTF8)
+ 		pg_verifymbstr(val, len, false);
+ 
+ 	/*
  	 * we use perls length in the event we had an embedded null byte to ensure
  	 * we error out properly
  	 */
***************
*** 56,67 **** static inline SV *
  cstr2sv(const char *str)
  {
  	SV *sv;
! 	char *utf8_str = utf_e2u(str);
  
  	sv = newSVpv(utf8_str, 0);
  	SvUTF8_on(sv);
  
! 	pfree(utf8_str);
  
  	return sv;
  }
--- 69,91 ----
  cstr2sv(const char *str)
  {
  	SV *sv;
! 	char *utf8_str;
  
+ 	/*
+ 	 * we dont do any conversion when SQL_ASCII leave it as byte soup
+ 	 */
+ 	if (GetDatabaseEncoding() != PG_SQL_ASCII)
+ 		return newSVpv(str, 0);
+ 
+ 	/*
+ 	 * for all other encodings we convert it to utf8
+ 	 */
+ 	utf8_str = utf_e2u(str);
  	sv = newSVpv(utf8_str, 0);
  	SvUTF8_on(sv);
  
! 	if (utf8_str != str)
! 		pfree(utf8_str);
  
  	return sv;
  }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to