diff --git a/lib/HTML/Form.pm b/lib/HTML/Form.pm index 2a10024..b49d18f 100644 --- a/lib/HTML/Form.pm +++ b/lib/HTML/Form.pm @@ -4,9 +4,12 @@ use strict; use URI; use Carp (); -use vars qw($VERSION); +use vars qw($VERSION $Encode_available); $VERSION = "5.817"; +eval { require Encode }; +$Encode_available = !$@; + my %form_tags = map {$_ => 1} qw(input textarea button select option); my %type2class = ( @@ -151,6 +154,7 @@ sub parse $f = $class->new($attr->{'method'}, $action, $attr->{'enctype'}); + $f->accept_charset($attr->{'accept-charset'}) if $attr->{'accept-charset'}; $f->{attr} = $attr; $f->strict(1) if $strict; %openselect = (); @@ -271,6 +275,7 @@ sub new { $self->{method} = uc(shift || "GET"); $self->{action} = shift || Carp::croak("No action defined"); $self->{enctype} = lc(shift || "application/x-www-form-urlencoded"); + $self->{accept_charset} = "UNKNOWN"; $self->{inputs} = [@_]; $self; } @@ -317,11 +322,22 @@ I to. This method gets/sets the encoding type for the form data. It is a string like "application/x-www-form-urlencoded" or "multipart/form-data". +=item $accept = $form->accept_charset + +=item $form->accept_charset( $new_accept ) + +This method gets/sets the list of charset encodings that the server +processing the form accepts. Current implementation supports only +one-element lists. Default value is "UNKNOWN" which we interpret as a +request to use UTF-8 encoding. To encode character strings you should +have modern perl with Encode module. On older perls this method has no +effect. + =cut BEGIN { # Set up some accesor - for (qw(method action enctype)) { + for (qw(method action enctype accept_charset)) { my $m = $_; no strict 'refs'; *{$m} = sub { @@ -641,6 +657,13 @@ sub make_request my $enctype = $self->{'enctype'}; my @form = $self->form; + my $charset = $self->accept_charset eq "UNKNOWN" ? 'utf-8' : $self->accept_charset; + if ($Encode_available) { + foreach my $fi (@form) { + $fi = Encode::encode($charset, $fi) if utf8::is_utf8($fi); + } + } + if ($method eq "GET") { require HTTP::Request; $uri = URI->new($uri, "http"); diff --git a/t/html/form-unicode.t b/t/html/form-unicode.t new file mode 100644 index 0000000..b574c9c --- /dev/null +++ b/t/html/form-unicode.t @@ -0,0 +1,73 @@ +#!perl -w + +use strict; +use Test qw(plan ok skip); + +eval { require Encode }; +if ($@) { + plan tests => 1; + skip('Skip: Encode not available', 0); + exit; +} +else { + plan tests => 15; +} + +use HTML::Form; + +my @warn; +$SIG{__WARN__} = sub { push(@warn, $_[0]) }; + +my $f = HTML::Form->parse(<<'EOT', "http://localhost/"); +
+ +
+
+EOT + +ok($f->value("name"), ""); +ok($f->accept_charset, "UNKNOWN"); +my $req = $f->click; +ok($req->uri, "http://localhost/abc?name="); + +$f->value(name => "\x{0424}"); # capital cyrillic ef +$req = $f->click; +ok($req->method, "GET"); +ok($req->uri, "http://localhost/abc?name=%D0%A4"); + +$f->method('POST'); +$f->enctype('multipart/form-data'); + +$req = $f->click; +ok($req->uri, "http://localhost/abc"); +ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xD0\xA4\r\n--xYzZY--\r\n"); + +$f->accept_charset('koi8-r'); +$req = $f->click; +ok($req->uri, "http://localhost/abc"); +ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xE6\r\n--xYzZY--\r\n"); + +$f->method('GET'); +$req = $f->click; +ok($req->uri, "http://localhost/abc?name=%E6"); + +$f = HTML::Form->parse(<<'EOT', "http://localhost/"); +
+ +
+
+EOT + +ok($f->accept_charset, 'koi8-r'); + +$f->value(name => "\x{0425}"); # capital cyrillic kha +$req = $f->click; +ok($req->method, "GET"); +ok($req->uri, "http://localhost/abc?name=%E8"); + +$f->method('POST'); +$f->enctype('multipart/form-data'); + +$req = $f->click; +ok($req->uri, "http://localhost/abc"); +ok($req->content, "--xYzZY\r\nContent-Disposition: form-data; name=\"name\"\r\n\r\n\xE8\r\n--xYzZY--\r\n");