On Thu, Feb 9, 2012 at 03:21, Christoph Berg <c...@df7cb.de> wrote: > Hi, > > we have a database that is storing strings in various encodings (and > non-encodings, namely the arbitrary byte soup [ ... ] > For this reason, the database uses > sql_ascii encoding
> ...snip... > In sql_ascii databases, utf_e2u does not do any recoding, but then > SvUTF8_on still marks the string as utf-8, while it isn't. > > (Returned values might also need fixing.) > > In my view, this is clearly a bug in pl/perl on sql_ascii databases. Yeah, there was some musing about this over in: http://archives.postgresql.org/pgsql-hackers/2011-02/msg01142.php Seems like we missed the fact that we still did SvUTF8_on() in sv2cstr and SvPVUTF8() when turning a perl string into a cstring. With the attached I get: => create or replace function perl_white(a text) returns text as $$ return shift; $$ language plperlu; => select perl_white(E'\200'), perl_white(E'\200')::bytea, coalesce(perl_white(E'\200'), 'null'); perl_white | perl_white | coalesce ------------+------------+---------- | \x80 | => select perl_white(E'\401'); perl_white ------------ \x01 (1 row) Does the attached fix the issue for you? Ill note that all the pls seem to behave a bit differently: => create or replace function py_white(a text) returns text as $$ return a; $$ language plpython3u; => select py_white(E'\200'), py_white(E'\200')::bytea, coalesce(py_white(E'\200'), 'null'); py_white | py_white | coalesce ----------+----------+---------- | | null (1 row) =>select py_white(E'\401'); py_white ---------- \x01 (1 row) => create or replace function tcl_white(text) returns text as $$ return $1; $$ language pltcl; => select tcl_white(E'\200'), tcl_white(E'\200')::bytea, coalesce(tcl_white(E'\200'), 'null'); tcl_white | tcl_white | coalesce -----------+-----------+---------- | \x80 | => select tcl_white(E'\402'); tcl_white ----------- \x02 (1 row)
*** a/src/pl/plperl/plperl_helpers.h --- b/src/pl/plperl/plperl_helpers.h *************** *** 5,23 **** * convert from utf8 to database encoding */ static inline char * ! utf_u2e(const char *utf8_str, size_t len) { ! int enc = GetDatabaseEncoding(); ! ! char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc); /* ! * when we are a PG_UTF8 or SQL_ASCII database ! * pg_do_encoding_conversion() will not do any conversion or ! * verification. we need to do it manually instead. */ if (enc == PG_UTF8 || enc == PG_SQL_ASCII) ! pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false); if (ret == utf8_str) ret = pstrdup(ret); --- 5,24 ---- * convert from utf8 to database encoding */ static inline char * ! utf_u2e(char *utf8_str, size_t len) { ! int enc = GetDatabaseEncoding(); ! char *ret = utf8_str; /* ! * when we are a PG_UTF8 or SQL_ASCII database pg_do_encoding_conversion() ! * will not do any conversion or verification. we need to do it manually ! * instead. */ if (enc == PG_UTF8 || enc == PG_SQL_ASCII) ! pg_verify_mbstr_len(enc, utf8_str, len, false); ! else ! ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc); if (ret == utf8_str) ret = pstrdup(ret); *************** *** 66,72 **** sv2cstr(SV *sv) * we are done */ SvREFCNT_inc(sv); ! val = SvPVutf8(sv, len); /* * we use perl's length in the event we had an embedded null byte to ensure --- 67,80 ---- * we are done */ SvREFCNT_inc(sv); ! /* ! * when SQL_ASCII just treat it as byte soup, that is fetch the string out ! * however it is currently stored by perl ! */ ! if (GetDatabaseEncoding() == PG_SQL_ASCII) ! val = SvPV(sv, len); ! else ! val = SvPVutf8(sv, len); /* * we use perl's length in the event we had an embedded null byte to ensure *************** *** 89,99 **** static inline SV * cstr2sv(const char *str) { SV *sv; ! char *utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); - pfree(utf8_str); return sv; --- 97,112 ---- cstr2sv(const char *str) { SV *sv; ! char *utf8_str; ! ! /* no conversion when SQL_ASCII */ ! if (GetDatabaseEncoding() == PG_SQL_ASCII) ! return newSVpv(str, 0); ! ! utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); pfree(utf8_str); return sv;
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers