Hello, > > Seems like we missed the fact that we still did SvUTF8_on() in sv2cstr > > and SvPVUTF8() when turning a perl string into a cstring. > > Right.
I spent a bit longer time catching on pl/perl and now understand what is the problem... > So I played a bit with this patch, and touched it a bit mainly just to > add some more comments; and while at it I noticed that some of the > functions in Util.xs might leak some memory, so I made an attempt to > plug them, as in the attached patch (which supersedes yours). Ok, Is it ok to look into the newer patch including fix of leaks at first? -- Coding and styles. This also seems to have polished the previous one on some codes, styles and comments which generally look reasonable. And patch style was corrected into unified. -- Functions I seems to work properly on the database the encodings of which are SQL_ASCII and UTF8 (and EUC-JP) as below, ================= => create or replace function foo(text) returns text language plperlu as $$ $a = shift; return "BOO!" if ($a != "a\x80cあ"); return $a; $$; SQL_ASCII=> select foo(E'a\200cあ') = E'a\200cあ'; ?column? ---------- t UTF8=> select foo(E'a\200cあ'); ERROR: invalid byte sequence for encoding "UTF8": 0x80 UTF8=> select foo(E'a\302\200cあ') = E'a\u0080cあ'; ?column? ---------- t ================= This looks quite valid according to the definition of the encodings and perl's nature as far as I see. -- The others Variable naming in util_quote_*() seems a bit confusing, > text *arg = sv2text(sv); > text *ret = DatumGetTextP(..., PointerGetDatum(arg))); > char *str; > pfree(arg); > str = text_to_cstring(ret); > RETVAL = cstr2sv(str); > pfree(str); Renaming ret to quoted and str to ret as the patch attached might make it easily readable. > Now, with my version of the patch applied and using a SQL_ASCII database > to test the problem in the original report, I notice that we now have a > regression failure: snip. > I'm not really sure what to do here -- maybe have a second expected file > for that test is a good enough answer? Or should I just take the test > out? Opinions please. The attached ugly patch does it. We seem should put NO_LOCALE=1 on the 'make check' command line for the encodings not compatible with the environmental locale, although it looks work. # UtfToLocal() seems to have a bug that always report illegal # encoding was "UTF8" regardless of the real encoding. But # plper_lc_*.(sql|out) increases if the bug is fixed. regards, -- Kyotaro Horiguchi NTT Open Source Software Center == My e-mail address has been changed since Apr. 1, 2012.
diff --git a/src/pl/plperl/Util.xs b/src/pl/plperl/Util.xs index 7d0102b..4b4b680 100644 --- a/src/pl/plperl/Util.xs +++ b/src/pl/plperl/Util.xs @@ -67,8 +67,11 @@ static text * sv2text(SV *sv) { char *str = sv2cstr(sv); + text *text; - return cstring_to_text(str); + text = cstring_to_text(str); + pfree(str); + return text; } MODULE = PostgreSQL::InServer::Util PREFIX = util_ @@ -113,10 +116,12 @@ util_quote_literal(sv) } else { text *arg = sv2text(sv); - text *ret = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg))); - char *str = text_to_cstring(ret); - RETVAL = cstr2sv(str); - pfree(str); + text *quoted = DatumGetTextP(DirectFunctionCall1(quote_literal, PointerGetDatum(arg))); + char *ret; + pfree(arg); + ret = text_to_cstring(quoted); + RETVAL = cstr2sv(ret); + pfree(ret); } OUTPUT: RETVAL @@ -132,10 +137,12 @@ util_quote_nullable(sv) else { text *arg = sv2text(sv); - text *ret = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg))); - char *str = text_to_cstring(ret); - RETVAL = cstr2sv(str); - pfree(str); + text *quoted = DatumGetTextP(DirectFunctionCall1(quote_nullable, PointerGetDatum(arg))); + char *ret; + pfree(arg); + ret = text_to_cstring(quoted); + RETVAL = cstr2sv(ret); + pfree(ret); } OUTPUT: RETVAL @@ -145,14 +152,15 @@ util_quote_ident(sv) SV *sv PREINIT: text *arg; - text *ret; - char *str; + text *quoted; + char *ret; CODE: arg = sv2text(sv); - ret = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg))); - str = text_to_cstring(ret); - RETVAL = cstr2sv(str); - pfree(str); + quoted = DatumGetTextP(DirectFunctionCall1(quote_ident, PointerGetDatum(arg))); + pfree(arg); + ret = text_to_cstring(quoted); + RETVAL = cstr2sv(ret); + pfree(ret); OUTPUT: RETVAL diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h index 1b6648b..ed99194 100644 --- a/src/pl/plperl/plperl_helpers.h +++ b/src/pl/plperl/plperl_helpers.h @@ -3,21 +3,29 @@ /* * convert from utf8 to database encoding + * + * Returns a palloc'ed copy of the original string */ static inline char * -utf_u2e(const char *utf8_str, size_t len) +utf_u2e(char *utf8_str, size_t len) { int enc = GetDatabaseEncoding(); - - char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc); + char *ret; /* - * when we are a PG_UTF8 or SQL_ASCII database pg_do_encoding_conversion() - * will not do any conversion or verification. we need to do it manually - * instead. + * When we are in a PG_UTF8 or SQL_ASCII database + * pg_do_encoding_conversion() will not do any conversion (which is good) + * or verification (not so much), so we need to run the verification step + * separately. */ if (enc == PG_UTF8 || enc == PG_SQL_ASCII) - pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false); + { + pg_verify_mbstr_len(enc, utf8_str, len, false); + ret = utf8_str; + } + else + ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, + len, PG_UTF8, enc); if (ret == utf8_str) ret = pstrdup(ret); @@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len) /* * convert from database encoding to utf8 + * + * Returns a palloc'ed copy of the original string */ static inline char * utf_e2u(const char *str) { - char *ret = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), GetDatabaseEncoding(), PG_UTF8); + char *ret = + (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + GetDatabaseEncoding(), PG_UTF8); if (ret == str) ret = pstrdup(ret); @@ -41,6 +53,8 @@ utf_e2u(const char *str) /* * Convert an SV to a char * in the current database encoding + * + * Returns a palloc'ed copy of the original string */ static inline char * sv2cstr(SV *sv) @@ -51,7 +65,9 @@ sv2cstr(SV *sv) /* * get a utf8 encoded char * out of perl. *note* it may not be valid utf8! - * + */ + + /* * SvPVutf8() croaks nastily on certain things, like typeglobs and * readonly objects such as $^V. That's a perl bug - it's not supposed to * happen. To avoid crashing the backend, we make a copy of the sv before @@ -63,18 +79,27 @@ sv2cstr(SV *sv) (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM)) sv = newSVsv(sv); else - + { /* * increase the reference count so we can just SvREFCNT_dec() it when * we are done */ SvREFCNT_inc_simple_void(sv); + } - val = SvPVutf8(sv, len); + /* + * Request the string from Perl, in UTF-8 encoding; but if we're in a + * SQL_ASCII database, just request the byte soup without trying to make it + * UTF8, because that might fail. + */ + if (GetDatabaseEncoding() == PG_SQL_ASCII) + val = SvPV(sv, len); + else + val = SvPVutf8(sv, len); /* - * we use perl's length in the event we had an embedded null byte to - * ensure we error out properly + * Now convert to database encoding. We use perl's length in the event we + * had an embedded null byte to ensure we error out properly. */ res = utf_u2e(val, len); @@ -88,16 +113,20 @@ sv2cstr(SV *sv) * Create a new SV from a string assumed to be in the current database's * encoding. */ - static inline SV * cstr2sv(const char *str) { SV *sv; - char *utf8_str = utf_e2u(str); + char *utf8_str; + + /* no conversion when SQL_ASCII */ + if (GetDatabaseEncoding() == PG_SQL_ASCII) + return newSVpv(str, 0); + + utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); - pfree(utf8_str); return sv;
diff --git a/src/pl/plperl/GNUmakefile b/src/pl/plperl/GNUmakefile index 188d7d2..8ab90a6 100644 --- a/src/pl/plperl/GNUmakefile +++ b/src/pl/plperl/GNUmakefile @@ -44,7 +44,9 @@ PERLCHUNKS = plc_perlboot.pl plc_trusted.pl SHLIB_LINK = $(perl_embed_ldflags) REGRESS_OPTS = --dbname=$(PL_TESTDB) --load-extension=plperl --load-extension=plperlu -REGRESS = plperl plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array +REGRESS_LC0 = $(subst .sql,,$(shell cd sql; ls plperl_lc_$(shell echo $(ENCODING) | tr "A-Z-" "a-z_").sql 2>/dev/null)) +REGRESS_LC = $(if $(REGRESS_LC0),$(REGRESS_LC0),plperl_lc) +REGRESS = plperl $(REGRESS_LC) plperl_trigger plperl_shared plperl_elog plperl_util plperl_init plperlu plperl_array # if Perl can support two interpreters in one backend, # test plperl-and-plperlu cases ifneq ($(PERL),) diff --git a/src/pl/plperl/expected/plperl.out b/src/pl/plperl/expected/plperl.out index df54937..906dc15 100644 --- a/src/pl/plperl/expected/plperl.out +++ b/src/pl/plperl/expected/plperl.out @@ -650,16 +650,6 @@ CONTEXT: PL/Perl anonymous code block DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; ERROR: Useless use of sort in scalar context at line 1. CONTEXT: PL/Perl anonymous code block --- --- Make sure strings are validated --- Should fail for all encodings, as nul bytes are never permitted. --- -CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ - return "abcd\0efg"; -$$ LANGUAGE plperl; -SELECT perl_zerob(); -ERROR: invalid byte sequence for encoding "UTF8": 0x00 -CONTEXT: PL/Perl function "perl_zerob" -- make sure functions marked as VOID without an explicit return work CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ $_SHARED{myquote} = sub { diff --git a/src/pl/plperl/expected/plperl_lc.out b/src/pl/plperl/expected/plperl_lc.out new file mode 100644 index 0000000..4f8c08f --- /dev/null +++ b/src/pl/plperl/expected/plperl_lc.out @@ -0,0 +1,10 @@ +-- +-- Make sure strings are validated +-- Should fail for all encodings, as nul bytes are never permitted. +-- +CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ + return "abcd\0efg"; +$$ LANGUAGE plperl; +SELECT perl_zerob(); +ERROR: invalid byte sequence for encoding "UTF8": 0x00 +CONTEXT: PL/Perl function "perl_zerob" diff --git a/src/pl/plperl/expected/plperl_lc_sql_ascii.out b/src/pl/plperl/expected/plperl_lc_sql_ascii.out new file mode 100644 index 0000000..022c3e2 --- /dev/null +++ b/src/pl/plperl/expected/plperl_lc_sql_ascii.out @@ -0,0 +1,10 @@ +-- +-- Make sure strings are validated +-- Should fail for all encodings, as nul bytes are never permitted. +-- +CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ + return "abcd\0efg"; +$$ LANGUAGE plperl; +SELECT perl_zerob(); +ERROR: invalid byte sequence for encoding "SQL_ASCII": 0x00 +CONTEXT: PL/Perl function "perl_zerob" diff --git a/src/pl/plperl/sql/plperl.sql b/src/pl/plperl/sql/plperl.sql index 84af1fd..a5e3840 100644 --- a/src/pl/plperl/sql/plperl.sql +++ b/src/pl/plperl/sql/plperl.sql @@ -423,15 +423,6 @@ DO $do$ use strict; my $name = "foo"; my $ref = $$name; $do$ LANGUAGE plperl; -- yields "ERROR: Useless use of sort in scalar context." DO $do$ use warnings FATAL => qw(void) ; my @y; my $x = sort @y; 1; $do$ LANGUAGE plperl; --- --- Make sure strings are validated --- Should fail for all encodings, as nul bytes are never permitted. --- -CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ - return "abcd\0efg"; -$$ LANGUAGE plperl; -SELECT perl_zerob(); - -- make sure functions marked as VOID without an explicit return work CREATE OR REPLACE FUNCTION myfuncs() RETURNS void AS $$ $_SHARED{myquote} = sub { diff --git a/src/pl/plperl/sql/plperl_lc.sql b/src/pl/plperl/sql/plperl_lc.sql new file mode 100644 index 0000000..a4a06e7 --- /dev/null +++ b/src/pl/plperl/sql/plperl_lc.sql @@ -0,0 +1,8 @@ +-- +-- Make sure strings are validated +-- Should fail for all encodings, as nul bytes are never permitted. +-- +CREATE OR REPLACE FUNCTION perl_zerob() RETURNS TEXT AS $$ + return "abcd\0efg"; +$$ LANGUAGE plperl; +SELECT perl_zerob(); diff --git a/src/pl/plperl/sql/plperl_lc_sql_ascii.sql b/src/pl/plperl/sql/plperl_lc_sql_ascii.sql new file mode 120000 index 0000000..9da97db --- /dev/null +++ b/src/pl/plperl/sql/plperl_lc_sql_ascii.sql @@ -0,0 +1 @@ +plperl_lc.sql \ No newline at end of file
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers