#!/usr/bin/perl -w

#
# this script shows a bug in DBD::Oracle v.1.22 (and others) execute_array 
# handling with respect to 'perl unicode strings', i.e. strings with the 
# internal utf8 flag set.
#
# according to the docs, these strings should automatically be handled 
# correctly by the driver, but this only works for execute, not execute_array.
# the values SHOULD be correctly stored on utf8 databases, or the correct 
# replacement character used on other databases.
#
# on a database with charset US7ASCII, this can result in database corruption 
# (i.e non ascii values in a varchar2 field), and on a UTF-8 database (UTF8 
# or AL32UTF8) will result in corrupted data.
#
# the test only functions on US7ASCII or UTF8/AL32UTF8 databases.
#
# run as:
#
#    utf8test.pl <oracle connection string>
#
#    eg. utf8test.pl scott/tiger 
#    or  utf8test.pl scott/tiger@//host/service
#    etc.
#

use strict;

use DBI;
use Getopt::Long;
use Encode;

#
# this test does NOT work (although the bug still exists) if
# non-ascii characters from the so-called native (latin-1 usually)
# character set are used, OR if a unicode character is used that
# has an ASCII replacement character other than '?', so be careful
# to use 'wierd' unicode characters here.
#
# the unicode character below looks like a house or keyhole
# it's a non-latin-1, non-ascii character without any ascii replacement 
# character the utf8 encoding of unicode char 6e9 is (0xDB,0xA9) or (219,169)
#
my $utf8_string = "\x{6e9}"; 
my $ascii_string = "A";

sub get_connection_string {
	my $connection_string;
	my $r = GetOptions("server=s" => \$connection_string);
	die "bad options" unless $r;
	$connection_string = shift @ARGV if (!$connection_string && @ARGV);
	die "must supply server connection string: $0 user/pass[\@{tnsname|//host/service}]"
		unless $connection_string;
	return $connection_string;
}

sub determine_db_charset_class {
	my ($dbh) = @_;
	my $sql = q/
--------------
select value from nls_database_parameters where parameter = 'NLS_CHARACTERSET'
	/;

	my ($db_charset) = $dbh->selectrow_array($sql);	

	my $types =  [
		{ type => 'ASCII', charsets => [ 'US7ASCII' ] }
		, { type => 'UTF8', charsets => [ 'UTF8','AL32UTF8' ] }
	];

	for my $t (@$types) {
		if (grep {$db_charset eq $_ } @{$t->{charsets}}) {
			return ($t->{type}, $db_charset);
		}
	}

	die "db charset $db_charset not handled by this testcase";
}

sub create_test_table {
	my ($dbh) = @_;
	my $sql = q/
----------------
create table t__utf8_test 
(
	method varchar2(50)
	, string_type varchar2(50)
	, string varchar2(200) 
)
	/;

	$dbh->do($sql);
	print "test table created\n";
}

sub drop_test_table {
	my ($dbh) = @_;
	my $sql = q/
------------------
drop table t__utf8_test
	/;

	eval {
		$dbh->do($sql);
	};

	if ($@) {
		if ($@ =~ /ORA-00942/) {
			print "test table doesn't exist for drop (ok)\n";
		} else {
			die "unexcepted error dropping table: $@";
		}
	}
}

sub create_statement {
	my ($dbh) = @_;
	return $dbh->prepare(q/
--------------
insert into t__utf8_test values (?,?,?)
	/);
}

sub insert_using_execute {
	my ($dbh) = @_;
	my $sth = create_statement $dbh;
	$sth->execute("execute", "utf8", $utf8_string);
	$sth->execute("execute", "ascii", $ascii_string);
	$dbh->commit;
	print "rows created using execute\n";
}

sub insert_using_execute_array {
	my ($dbh) = @_;
	my $sth = create_statement $dbh;
	my @tuple_status;
	my @types = ("utf8", "ascii");
	my $rows = $sth->execute_array( {
		ArrayTupleStatus => \@tuple_status
		, ArrayTupleFetch => sub {
			my $type = pop @types;
			return undef unless $type;
			my $str = $type eq "utf8"? $utf8_string : $ascii_string;
			return ['execute_array',$type, $str ];
		}
	});

	unless (defined($rows)) {
		die "error during execute_array...";
		# actual error message available via @tuple_status...
	}

	$dbh->commit;
	print "rows created using execute_array\n";
}

sub check_show_results {
	my ($dbh, $dbclass) = @_;
	my $sth = $dbh->prepare(q/
--------------
select method,string_type,dump(string) from t__utf8_test
order by method,string_type
	/);
	$sth->execute;

	print "results: \n";
	printf "  %-15.15s %-6.6s %-40.40s %s\n", 
		"method","type","dump","result";
	printf "  %-15.15s %-6.6s %-40.40s %s\n", 
			"=================","===========","===================================","======";

	while (my ($method, $string_type, $dump) = $sth->fetchrow_array) {
		my $orig_dump = $dump;
		$dump=~s/^.*:\s+//g;
		# extract as 'bytes' and use decode below for utf8 as necessary
		my $str = pack("C*", split /,/, $dump);
		my $result;
		if ($string_type eq "ascii") {
			$result = ($str eq $ascii_string) ? 'PASS':'FAIL';
		} else {
			if ($dbclass eq "ASCII") {
				# shoddy brute force replacement of non-ascii
				(my $tmp = $utf8_string) =~ s/[^[:ascii:]]/?/g;
				$result = $str eq $tmp ? 'PASS':'FAIL';
			} else {
				my $dec_str = Encode::decode_utf8($str);
				$result=$dec_str eq $utf8_string?'PASS':'FAIL';
			}
		}

		printf "  %-15.15s %-6.6s %-40.40s %s\n", 
			$method, $string_type, $orig_dump, $result;
	
	}
	print "\n";
}

sub do_test {
	my ($dbh) = @_;

	my ($dbclass, $charset) = determine_db_charset_class $dbh;

	print "\n======= test for database type $dbclass ($charset) ============\n\n";

	drop_test_table $dbh;
	create_test_table $dbh;
	insert_using_execute $dbh;
	insert_using_execute_array $dbh; 
	check_show_results $dbh, $dbclass;
	drop_test_table $dbh;

	$dbh->disconnect;
}

sub main {
	#
	# ensure environment consistentcy by clearing NLS_LANG
	#
	delete $ENV{NLS_LANG};

	die "utf8_string doesn't have unicode flag set" 
		unless Encode::is_utf8($utf8_string);

	die "ascii_string does have unicode flag set" 
		unless !Encode::is_utf8($ascii_string);

	my $dbh = DBI->connect("dbi:Oracle:", get_connection_string, "", 
		{PrintError=>0, RaiseError=>1,AutoCommit=>0});

	do_test $dbh;
}

main;
exit 1;

