[not subscribed so can you keep me in the cc?]

Hi All,

Hope I've got the right target audience.

Attached is a patch which modifies the behavior of DBD-Oracle. It's
against version 1.22.

First the problem:

When using 'execute', the code in dbdimp.c ensures that if a perl
'unicode string' (i.e. a scalar value with the utf8 flag set) is used in
bind_param as a placeholder, that the value will be correctly handled as
a UTF8 string, regardless of the other factors influencing character set
handling (primarily NLS_LANG).

However, this behavior is not present for 'execute_array'.  

The result is:
1) corrupted database (non-ascii strings in a US7ASCII database)
2) corrupted values (interpretation of a octet sequence according to
NLS_LANG etc. when really the octet sequence is utf8)

I tried reporting this bug via bug-dbd-oracle [at] rt.cpan.org but it
didn't work.

The attached patch (hopefully) fixes it.  Here's the approach:

1) during the nested 'for' loops in ora_st_execute_array, keep track in
a flags-per-placeholder array whether the values in the array contain
utf8 values, non-utf8 ('native') values or both

2) when finally binding the column, if there are any utf8 values present
AND either a) there are ONLY utf8 values present, or b) the 'native'
character set (the character set we'd be using if we weren't messing
around with it) is _compatible_ to utf8 (i.e. utf8 or us7ascii), then
switch to utf8.  This logic is largely identical to the code in
dbd_rebind_ph.

However, if mixed utf8 and 'native' scalar values are present for a
'column' of bind values, _AND_ the 'native' character set is not
compatible with utf8, then we croak.

Any thoughts, feedback etc?

Thanks,
David Mansfield
Cobite, Inc.






Only in DBD-Oracle-1.22: blib
diff -ur DBD-Oracle-1.22.orig/dbdimp.c DBD-Oracle-1.22/dbdimp.c
--- DBD-Oracle-1.22.orig/dbdimp.c	2008-07-28 10:50:33.000000000 -0400
+++ DBD-Oracle-1.22/dbdimp.c	2008-08-27 17:24:49.000000000 -0400
@@ -43,8 +43,14 @@
 int is_extproc = 0;
 int dbd_verbose = 0; /* DBD only debugging*/
 
+/* bitflag constants for figuring out how to handle utf8 for array binds */
+#define ARRAY_BIND_NATIVE 0x01
+#define ARRAY_BIND_UTF8   0x02
+#define ARRAY_BIND_MIXED  (ARRAY_BIND_NATIVE|ARRAY_BIND_UTF8)
+
 ub2 charsetid = 0;
 ub2 ncharsetid = 0;
+ub2 us7ascii_csid = 1;
 ub2 utf8_csid = 871;
 ub2 al32utf8_csid = 873;
 ub2 al16utf16_csid = 2000;
@@ -3086,13 +3092,18 @@
 }
 
 static int
-do_bind_array_exec(sth, imp_sth, phs)
+do_bind_array_exec(sth, imp_sth, phs, utf8)
     SV *sth;
     imp_sth_t *imp_sth;
     phs_t *phs;
+    int utf8;
 {
 	dTHX;
     sword status;
+    ub1 csform;
+    ub2 csid;
+    int trace_level = DBIS->debug;
+
     OCIBindByName_log_stat(imp_sth->stmhp, &phs->bndhp, imp_sth->errhp,
             (text*)phs->name, (sb4)strlen(phs->name),
             0,
@@ -3117,6 +3128,73 @@
         oci_error(sth, imp_sth->errhp, status, "OCIBindDynamic");
         return 0;
     }
+
+    /* copied and adapted from dbd_rebind_ph */
+
+    csform = phs->csform;
+
+    if (!csform && (utf8 & ARRAY_BIND_UTF8)) {
+        /* try to default csform to avoid translation through non-unicode */
+                if (CSFORM_IMPLIES_UTF8(SQLCS_IMPLICIT))                /* prefer IMPLICIT */
+                        csform = SQLCS_IMPLICIT;
+                else if (CSFORM_IMPLIES_UTF8(SQLCS_NCHAR))
+                csform = SQLCS_NCHAR;   /* else leave csform == 0 */
+        if (trace_level || dbd_verbose >= 1)
+            PerlIO_printf(DBILOGFP, "do_bind_array_exec() (2): rebinding %s with UTF8 value %s", phs->name,
+                (csform == SQLCS_IMPLICIT) ? "so setting csform=SQLCS_IMPLICIT" :
+                (csform == SQLCS_NCHAR)    ? "so setting csform=SQLCS_NCHAR" :
+            "but neither CHAR nor NCHAR are unicode\n");
+    }
+
+    if (csform) {
+        /* set OCI_ATTR_CHARSET_FORM before we get the default OCI_ATTR_CHARSET_ID */
+        OCIAttrSet_log_stat(phs->bndhp, (ub4) OCI_HTYPE_BIND,
+            &csform, (ub4) 0, (ub4) OCI_ATTR_CHARSET_FORM, imp_sth->errhp, status);
+        if ( status != OCI_SUCCESS ) {
+            oci_error(sth, imp_sth->errhp, status, ora_sql_error(imp_sth,"OCIAttrSet (OCI_ATTR_CHARSET_FORM)"));
+            return 0;
+        }
+    }
+
+    if (!phs->csid_orig) {      /* get the default csid Oracle would use */
+        OCIAttrGet_log_stat(phs->bndhp, OCI_HTYPE_BIND, &phs->csid_orig, (ub4)0 ,
+                OCI_ATTR_CHARSET_ID, imp_sth->errhp, status);
+    }
+
+    /* if app has specified a csid then use that, else use default */
+    csid = (phs->csid) ? phs->csid : phs->csid_orig;
+
+    /* if data is utf8 but charset isn't then switch to utf8 csid if possible */
+    if ((utf8 & ARRAY_BIND_UTF8) && !CS_IS_UTF8(csid)) {
+      /* if the specified or default csid is not utf8 _compatible_ AND we have
+       * mixed utf8 and native (non-utf8) data, then it's a fatal problem 
+       * utf8 _compatible_ means, can be upgraded to utf8, ie. utf8 or ascii */
+      if ((utf8 & ARRAY_BIND_NATIVE) && !CS_IS_UTF8_COMPATIBLE(csid)) {
+	croak("Can't mix utf8 and non-utf8 in array bind");
+      }
+      csid = utf8_csid; /* not al32utf8_csid here on purpose */
+    }
+
+    if (trace_level >= 3 || dbd_verbose >= 3 )
+                PerlIO_printf(DBILOGFP, "do_bind_array_exec(): bind %s <== [array of values] "
+                "(%s, %s, csid %d->%d->%d, ftype %d (%s), csform %d->%d, maxlen %lu, maxdata_size %lu)\n",
+              phs->name, 
+              (phs->is_inout) ? "inout" : "in",
+              (utf8 ? "is-utf8" : "not-utf8"),
+              phs->csid_orig, phs->csid, csid,
+              phs->ftype,sql_typecode_name(phs->ftype), phs->csform, csform,
+              (unsigned long)phs->maxlen, (unsigned long)phs->maxdata_size);
+
+
+    if (csid) {
+        OCIAttrSet_log_stat(phs->bndhp, (ub4) OCI_HTYPE_BIND,
+            &csid, (ub4) 0, (ub4) OCI_ATTR_CHARSET_ID, imp_sth->errhp, status);
+        if ( status != OCI_SUCCESS ) {
+            oci_error(sth, imp_sth->errhp, status, ora_sql_error(imp_sth,"OCIAttrSet (OCI_ATTR_CHARSET_ID)"));
+            return 0;
+        }
+    }
+
     return 1;
 }
 
@@ -3166,6 +3244,7 @@
     char namebuf[30];
     STRLEN len;
     int outparams = (imp_sth->out_params_av) ? AvFILL(imp_sth->out_params_av)+1 : 0;
+    int *utf8_flgs;
 
     if (debug >= 2  || dbd_verbose >=2)
  		PerlIO_printf(DBILOGFP, "  ora_st_execute_array %s count=%d (%s %s %s)...\n",
@@ -3229,18 +3308,22 @@
 
     param_count=DBIc_NUM_PARAMS(imp_sth);
 	phs = safemalloc(param_count*sizeof(*phs));
+	utf8_flgs = safemalloc(param_count*sizeof(int));
     memset(phs, 0, param_count*sizeof(*phs));
+    memset(utf8_flgs, 0, param_count*sizeof(int));
 
    	for(j = 0; (unsigned int) j < exe_count; j++) {
 
     	sv_p = av_fetch(tuples_av, j, 0);
         if(sv_p == NULL) {
             Safefree(phs);
+	    Safefree(utf8_flgs);
              croak("Cannot fetch tuple %d", j);
         }
         sv = *sv_p;
         if(!SvROK(sv) || SvTYPE(SvRV(sv)) != SVt_PVAV) {
             Safefree(phs);
+	    Safefree(utf8_flgs);
             croak("Not an array ref in element %d", j);
         }
         av = (AV*)SvRV(sv);
@@ -3252,11 +3335,13 @@
                                 namebuf, strlen(namebuf), 0);
                if (phs_svp == NULL) {
                     Safefree(phs);
+	            Safefree(utf8_flgs);
                     croak("Can't execute for non-existent placeholder :%d", i);
                }
                phs[i] = (phs_t*)(void*)SvPVX(*phs_svp); /* placeholder struct */
                if(phs[i]->idx < 0) {
                    Safefree(phs);
+	           Safefree(utf8_flgs);
                    croak("Placeholder %d not of ?/:1 type", i);
                 }
                 init_bind_for_array_exec(phs[i]);
@@ -3264,6 +3349,7 @@
             sv_p = av_fetch(av, phs[i]->idx, 0);
             if(sv_p == NULL) {
                 Safefree(phs);
+	        Safefree(utf8_flgs);
                 croak("Cannot fetch value for param %d in entry %d", i, j);
         	}
 
@@ -3281,21 +3367,31 @@
         	/* Find the value length, and increase maxlen if needed. */
         	if(SvROK(sv)) {
         	    Safefree(phs);
+	            Safefree(utf8_flgs);
         	    croak("Can't bind a reference (%s) for param %d, entry %d",
         	    neatsvpv(sv,0), i, j);
         	}
         	if(len > (unsigned int) phs[i]->maxlen)
         	    phs[i]->maxlen = len;
 
+		/* update the utf8_flgs for this value */
+		if (SvUTF8(sv)) {
+		  utf8_flgs[i] |= ARRAY_BIND_UTF8;
+		} 
+		else {
+		  utf8_flgs[i] |= ARRAY_BIND_NATIVE;
+		}
         	/* Do OCI bind calls on last iteration. */
         	if( ((unsigned int) j ) == exe_count - 1 ) {
-        	    if(!do_bind_array_exec(sth, imp_sth, phs[i])) {
+		  if(!do_bind_array_exec(sth, imp_sth, phs[i], utf8_flgs[i])) {
         	        Safefree(phs);
+	                Safefree(utf8_flgs);
         		}
 			}
     	}
   	}
 	Safefree(phs);
+	Safefree(utf8_flgs);
 
     /* Store array of bind typles, for use in OCIBindDynamic() callback. */
     imp_sth->bind_tuples = tuples_av;
Only in DBD-Oracle-1.22: dbdimp.c~
diff -ur DBD-Oracle-1.22.orig/dbdimp.h DBD-Oracle-1.22/dbdimp.h
--- DBD-Oracle-1.22.orig/dbdimp.h	2008-07-28 10:30:03.000000000 -0400
+++ DBD-Oracle-1.22/dbdimp.h	2008-08-27 17:12:06.000000000 -0400
@@ -254,6 +254,7 @@
 
 extern ub2 charsetid;
 extern ub2 ncharsetid;
+extern ub2 us7ascii_csid;
 extern ub2 utf8_csid;
 extern ub2 al32utf8_csid;
 extern ub2 al16utf16_csid;
@@ -261,6 +262,9 @@
 #define CS_IS_UTF8( cs ) \
    (  ( cs == utf8_csid ) || ( cs == al32utf8_csid ) )
 
+#define CS_IS_UTF8_COMPATIBLE( cs ) \
+  ( CS_IS_UTF8(cs) || ( (cs) == us7ascii_csid ) )
+
 #define CS_IS_UTF16( cs ) ( cs == al16utf16_csid )
 
 #define CSFORM_IMPLIED_CSID(csform) \
Only in DBD-Oracle-1.22: dbdimp.h~
Only in DBD-Oracle-1.22: dbdimp.o
Only in DBD-Oracle-1.22: debugfiles.list
Only in DBD-Oracle-1.22: debuglinks.list
Only in DBD-Oracle-1.22: debugsources.list
Only in DBD-Oracle-1.22: Makefile
Only in DBD-Oracle-1.22: mk.pm
Only in DBD-Oracle-1.22: oci8.o
Only in DBD-Oracle-1.22: Oracle.bs
Only in DBD-Oracle-1.22: Oracle.c
Only in DBD-Oracle-1.22: Oracle.o
Only in DBD-Oracle-1.22: Oracle.xsi
Only in DBD-Oracle-1.22: ora_explain
Only in DBD-Oracle-1.22: pm_to_blib

Reply via email to