Re: [Dbdpg-general] Unicode in DBD:::Pg

Bruce Momjian Wed, 12 Feb 2003 22:01:38 -0800

Here is the UTF8 patch.  Is this something we want to apply now?

---------------------------------------------------------------------------


Dominic Mitchell wrote:
> Before christmas, I started a thread in dbi-users about the support for 
> setting the utf8 flag on returned values[1].  I've got another patch 
> now, which is less intrusive.  This adds $dbh->{pg_do_utf8}, which will 
> turn on marking returned data as UTF-8 if necessary.
> 
> I would like this to be considered for inclusion with DBD::Pg, as I feel 
> it's necessary to correct broken behaviour that I am seeing.
> 
> I'm aware that Tim Bunce thinks that a better interface should be found 
> for this sort of thing, and I agree.  But unfortunately, I need to get 
> this problem solved, and the attached patch would be an extremely useful 
> stop gap measure.
> 
> Thanks,
> -Dom
> 
> [1] http:[EMAIL PROTECTED]/msg15428.html
> 
> -- 
> | Semantico: creators of major online resources          |
> |       URL: http://www.semantico.com/                   |
> |       Tel: +44 (1273) 722222                           |
> |   Address: 33 Bond St., Brighton, Sussex, BN1 1RD, UK. |

> ? TESTLOG
> ? TESTLOG-commented-out-utf8-bits
> ? t/.nfs00a6dec800000014
> Index: Pg.pm
> ===================================================================
> RCS file: /usr/local/cvsroot/dbdpg/dbdpg/Pg.pm,v
> retrieving revision 1.17
> diff -u -r1.17 Pg.pm
> --- Pg.pm     30 Dec 2002 04:59:05 -0000      1.17
> +++ Pg.pm     10 Jan 2003 11:59:59 -0000
> @@ -1288,6 +1288,15 @@
>  escaped by a backslash. Any other ASCII character can be used directly in a
>  string constant.
>  
> +=item B<pg_do_utf8> (boolean)
> +
> +PostgreSQL specific attribute.  If true, then the utf8 flag will be
> +turned for returned character data (if the data is valid utf8).  For
> +details about the utf8 flag, see L<Encode>.  This is only relevant under
> +perl 5.8 and higher.
> +
> +B<NB>: This attribute is experimental and may be subject to change.
> +
>  =item B<pg_INV_READ> (integer, read-only)
>  
>  Constant to be used for the mode in lo_creat and lo_open.
> Index: dbdimp.c
> ===================================================================
> RCS file: /usr/local/cvsroot/dbdpg/dbdpg/dbdimp.c,v
> retrieving revision 1.10
> diff -u -r1.10 dbdimp.c
> --- dbdimp.c  8 Jan 2003 22:08:17 -0000       1.10
> +++ dbdimp.c  10 Jan 2003 12:00:01 -0000
> @@ -470,6 +470,8 @@
>          imp_dbh->pg_auto_escape = newval;
>      } else if (kl==10 && strEQ(key, "pg_bool_tf")) {
>       imp_dbh->pg_bool_tf = newval;
> +    } else if (kl==10 && strEQ(key, "pg_do_utf8")) {
> +        imp_dbh->pg_do_utf8 = newval;
>      } else {
>          return 0;
>      }
> @@ -494,6 +496,8 @@
>          retsv = newSViv((IV)imp_dbh->pg_auto_escape);
>      } else if (kl==10 && strEQ(key, "pg_bool_tf")) {
>       retsv = newSViv((IV)imp_dbh->pg_bool_tf);
> +    } else if (kl==10 && strEQ(key, "pg_do_utf8")) {
> +        retsv = newSViv((IV)imp_dbh->pg_do_utf8);
>      } else if (kl==11 && strEQ(key, "pg_INV_READ")) {
>          retsv = newSViv((IV)INV_READ);
>      } else if (kl==12 && strEQ(key, "pg_INV_WRITE")) {
> @@ -1332,6 +1336,15 @@
>  }
>  
>  
> +int
> +is_high_bit_set(val)
> +    char *val;
> +{
> +    while (*val++)
> +     if (*val & 0x80) return 1;
> +    return 0;
> +}
> +
>  AV *
>  dbd_st_fetch (sth, imp_sth)
>      SV *sth;
> @@ -1403,6 +1416,14 @@
>                  val[val_len] = '\0';
>              }
>              sv_setpvn(sv, val, val_len);
> +         if (imp_dbh->pg_do_utf8) {
> +             SvUTF8_off(sv);
> +             /* XXX Is this all the character data types? */
> +             if (18 == type || 25 == type || 1042 ==type || 1043 == type) {
> +                 if (is_high_bit_set(val) && is_utf8_string(val, val_len))
> +                     SvUTF8_on(sv);
> +             }
> +         }
>          }
>      }
>  
> Index: dbdimp.h
> ===================================================================
> RCS file: /usr/local/cvsroot/dbdpg/dbdpg/dbdimp.h,v
> retrieving revision 1.4
> diff -u -r1.4 dbdimp.h
> --- dbdimp.h  8 Jan 2003 22:08:17 -0000       1.4
> +++ dbdimp.h  10 Jan 2003 12:00:01 -0000
> @@ -23,6 +23,7 @@
>      int         init_commit; /* initialize AutoCommit */
>      int         pg_auto_escape;      /* initialize AutoEscape */
>      int         pg_bool_tf;     /* do bools return 't'/'f' */
> +    int         pg_do_utf8;  /* should we attempt to make utf8 strings? */
>  };
>  
>  /* Define sth implementor data structure */
> Index: t/05fetch.t
> ===================================================================
> RCS file: /usr/local/cvsroot/dbdpg/dbdpg/t/05fetch.t,v
> retrieving revision 1.3
> diff -u -r1.3 05fetch.t
> --- t/05fetch.t       27 Nov 2002 09:24:36 -0000      1.3
> +++ t/05fetch.t       10 Jan 2003 12:00:01 -0000
> @@ -3,7 +3,7 @@
>  use Test::More;
>  
>  if (defined $ENV{DBI_DSN}) {
> -  plan tests => 7;
> +  plan tests => 10;
>  } else {
>    plan skip_all => 'cannot test without DB info';
>  }
> @@ -80,6 +80,30 @@
>  ok($rows == 1,
>     'fetch one row on id'
>    );
> +
> +# Attempt to test whether or not we can get unicode out of the database
> +# correctly.  Reuse the previous sth.
> +SKIP: {
> +  eval "use Encode";
> +  skip "need Encode module for unicode tests", 3 if $@;
> +  local $dbh->{pg_do_utf8} = 1;
> +  $dbh->do("INSERT INTO test (id, name, val) VALUES (4, '\x{0100}dam', 'cow')");
> +  $sth->execute(4);
> +  my ($id, $name) = $sth->fetchrow_array();
> +  ok(Encode::is_utf8($name),
> +     'returned data has utf8 bit set'
> +    );
> +  is(length($name), 4,
> +     'returned utf8 data is not corrupted'
> +    );
> +  $sth->finish();
> +  $sth->execute(1);
> +  my ($id2, $name2) = $sth->fetchrow_array();
> +  ok(! Encode::is_utf8($name2),
> +     'returned ASCII data has not got utf8 bit set'
> +    );
> +  $sth->finish();
> +}
>  
>  $sql = <<SQL;
>         SELECT id

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  [EMAIL PROTECTED]               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073

Re: [Dbdpg-general] Unicode in DBD:::Pg

Reply via email to